Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -4238,6 +4238,13 @@
   }
 #endif
 
+  // Update the largest vector width if any arguments have vector types.
+  for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
+    if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType()))
+      LargestVectorWidth = std::max(LargestVectorWidth,
+                                    VT->getPrimitiveSizeInBits());
+  }
+
   // Compute the calling convention and attributes.
   unsigned CallingConv;
   llvm::AttributeList Attrs;
@@ -4318,6 +4325,11 @@
   if (!CI->getType()->isVoidTy())
     CI->setName("call");
 
+  // Update largest vector width from the return type.
+  if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType()))
+    LargestVectorWidth = std::max(LargestVectorWidth,
+                                  VT->getPrimitiveSizeInBits());
+
   // Insert instrumentation or attach profile metadata at indirect call sites.
   // For more details, see the comment before the definition of
   // IPVK_IndirectCallTarget in InstrProfData.inc.
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -430,7 +430,24 @@
     NormalCleanupDest = Address::invalid();
   }
 
-  // Add the required-vector-width attribute.
+  // Scan function arguments for vector width.
+  for (llvm::Argument &A : CurFn->args())
+    if (auto *VT = dyn_cast<llvm::VectorType>(A.getType()))
+      LargestVectorWidth = std::max(LargestVectorWidth,
+                                    VT->getPrimitiveSizeInBits());
+
+  // Update vector width based on return type.
+  if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType()))
+    LargestVectorWidth = std::max(LargestVectorWidth,
+                                  VT->getPrimitiveSizeInBits());
+
+  // Add the required-vector-width attribute. This contains the max width from:
+  // 1. min-vector-width attribute used in the source program.
+  // 2. Any builtins used that have a vector width specified.
+  // 3. Values passed in and out of inline assembly.
+  // 4. Width of vector arguments and return types for this function.
+  // 5. Width of vector aguments and return types for functions called by this
+  //    function.
   if (LargestVectorWidth != 0)
     CurFn->addFnAttr("min-legal-vector-width",
                      llvm::utostr(LargestVectorWidth));
Index: test/CodeGen/aarch64-neon-3v.c
===================================================================
--- test/CodeGen/aarch64-neon-3v.c
+++ test/CodeGen/aarch64-neon-3v.c
@@ -11,7 +11,7 @@
   return vand_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vandq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vandq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, %b
 // CHECK:   ret <16 x i8> [[AND_I]]
 int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) {
@@ -25,7 +25,7 @@
   return vand_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, %b
 // CHECK:   ret <8 x i16> [[AND_I]]
 int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) {
@@ -39,7 +39,7 @@
   return vand_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vandq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vandq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, %b
 // CHECK:   ret <4 x i32> [[AND_I]]
 int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) {
@@ -53,7 +53,7 @@
   return vand_s64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vandq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vandq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, %b
 // CHECK:   ret <2 x i64> [[AND_I]]
 int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) {
@@ -67,7 +67,7 @@
   return vand_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, %b
 // CHECK:   ret <16 x i8> [[AND_I]]
 uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) {
@@ -81,7 +81,7 @@
   return vand_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vandq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vandq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, %b
 // CHECK:   ret <8 x i16> [[AND_I]]
 uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) {
@@ -95,7 +95,7 @@
   return vand_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, %b
 // CHECK:   ret <4 x i32> [[AND_I]]
 uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) {
@@ -109,7 +109,7 @@
   return vand_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vandq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vandq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, %b
 // CHECK:   ret <2 x i64> [[AND_I]]
 uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) {
@@ -123,7 +123,7 @@
   return vorr_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vorrq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vorrq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, %b
 // CHECK:   ret <16 x i8> [[OR_I]]
 int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) {
@@ -137,7 +137,7 @@
   return vorr_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, %b
 // CHECK:   ret <8 x i16> [[OR_I]]
 int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) {
@@ -151,7 +151,7 @@
   return vorr_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vorrq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vorrq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, %b
 // CHECK:   ret <4 x i32> [[OR_I]]
 int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) {
@@ -165,7 +165,7 @@
   return vorr_s64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vorrq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vorrq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, %b
 // CHECK:   ret <2 x i64> [[OR_I]]
 int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) {
@@ -179,7 +179,7 @@
   return vorr_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, %b
 // CHECK:   ret <16 x i8> [[OR_I]]
 uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) {
@@ -193,7 +193,7 @@
   return vorr_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vorrq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vorrq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, %b
 // CHECK:   ret <8 x i16> [[OR_I]]
 uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) {
@@ -207,7 +207,7 @@
   return vorr_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, %b
 // CHECK:   ret <4 x i32> [[OR_I]]
 uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) {
@@ -221,7 +221,7 @@
   return vorr_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vorrq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vorrq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, %b
 // CHECK:   ret <2 x i64> [[OR_I]]
 uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) {
@@ -235,7 +235,7 @@
   return veor_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_veorq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_veorq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <16 x i8> %a, %b
 // CHECK:   ret <16 x i8> [[XOR_I]]
 int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) {
@@ -249,7 +249,7 @@
   return veor_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <8 x i16> %a, %b
 // CHECK:   ret <8 x i16> [[XOR_I]]
 int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) {
@@ -263,7 +263,7 @@
   return veor_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_veorq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_veorq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <4 x i32> %a, %b
 // CHECK:   ret <4 x i32> [[XOR_I]]
 int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) {
@@ -277,7 +277,7 @@
   return veor_s64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_veorq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_veorq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <2 x i64> %a, %b
 // CHECK:   ret <2 x i64> [[XOR_I]]
 int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) {
@@ -291,7 +291,7 @@
   return veor_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <16 x i8> %a, %b
 // CHECK:   ret <16 x i8> [[XOR_I]]
 uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) {
@@ -305,7 +305,7 @@
   return veor_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_veorq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_veorq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <8 x i16> %a, %b
 // CHECK:   ret <8 x i16> [[XOR_I]]
 uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) {
@@ -319,7 +319,7 @@
   return veor_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <4 x i32> %a, %b
 // CHECK:   ret <4 x i32> [[XOR_I]]
 uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) {
@@ -333,7 +333,7 @@
   return veor_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_veorq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_veorq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <2 x i64> %a, %b
 // CHECK:   ret <2 x i64> [[XOR_I]]
 uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) {
@@ -348,7 +348,7 @@
   return vbic_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vbicq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vbicq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
 // CHECK:   ret <16 x i8> [[AND_I]]
@@ -364,7 +364,7 @@
   return vbic_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
 // CHECK:   ret <8 x i16> [[AND_I]]
@@ -380,7 +380,7 @@
   return vbic_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vbicq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vbicq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
 // CHECK:   ret <4 x i32> [[AND_I]]
@@ -396,7 +396,7 @@
   return vbic_s64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vbicq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vbicq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
 // CHECK:   ret <2 x i64> [[AND_I]]
@@ -412,7 +412,7 @@
   return vbic_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
 // CHECK:   ret <16 x i8> [[AND_I]]
@@ -428,7 +428,7 @@
   return vbic_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vbicq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vbicq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
 // CHECK:   ret <8 x i16> [[AND_I]]
@@ -444,7 +444,7 @@
   return vbic_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
 // CHECK:   ret <4 x i32> [[AND_I]]
@@ -460,7 +460,7 @@
   return vbic_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vbicq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vbicq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
 // CHECK:   ret <2 x i64> [[AND_I]]
@@ -476,7 +476,7 @@
   return vorn_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vornq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vornq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
 // CHECK:   ret <16 x i8> [[OR_I]]
@@ -492,7 +492,7 @@
   return vorn_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
 // CHECK:   ret <8 x i16> [[OR_I]]
@@ -508,7 +508,7 @@
   return vorn_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vornq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vornq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
 // CHECK:   ret <4 x i32> [[OR_I]]
@@ -524,7 +524,7 @@
   return vorn_s64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vornq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vornq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
 // CHECK:   ret <2 x i64> [[OR_I]]
@@ -540,7 +540,7 @@
   return vorn_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
 // CHECK:   ret <16 x i8> [[OR_I]]
@@ -556,7 +556,7 @@
   return vorn_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vornq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vornq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
 // CHECK:   ret <8 x i16> [[OR_I]]
@@ -572,7 +572,7 @@
   return vorn_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
 // CHECK:   ret <4 x i32> [[OR_I]]
@@ -588,10 +588,13 @@
   return vorn_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vornq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vornq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
 // CHECK:   ret <2 x i64> [[OR_I]]
 uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) {
   return vornq_u64(a, b);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
Index: test/CodeGen/aarch64-neon-across.c
===================================================================
--- test/CodeGen/aarch64-neon-across.c
+++ test/CodeGen/aarch64-neon-across.c
@@ -6,7 +6,7 @@
 #include <arm_neon.h>
 
 // CHECK-LABEL: define i16 @test_vaddlv_s8(<8 x i8> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
 // CHECK:   ret i16 [[TMP0]]
 int16_t test_vaddlv_s8(int8x8_t a) {
@@ -14,14 +14,14 @@
 }
 
 // CHECK-LABEL: define i32 @test_vaddlv_s16(<4 x i16> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   ret i32 [[VADDLV_I]]
 int32_t test_vaddlv_s16(int16x4_t a) {
   return vaddlv_s16(a);
 }
 
 // CHECK-LABEL: define i16 @test_vaddlv_u8(<8 x i8> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
 // CHECK:   ret i16 [[TMP0]]
 uint16_t test_vaddlv_u8(uint8x8_t a) {
@@ -29,58 +29,58 @@
 }
 
 // CHECK-LABEL: define i32 @test_vaddlv_u16(<4 x i16> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   ret i32 [[VADDLV_I]]
 uint32_t test_vaddlv_u16(uint16x4_t a) {
   return vaddlv_u16(a);
 }
 
-// CHECK-LABEL: define i16 @test_vaddlvq_s8(<16 x i8> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i16 @test_vaddlvq_s8(<16 x i8> %a) #1 {
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
 // CHECK:   ret i16 [[TMP0]]
 int16_t test_vaddlvq_s8(int8x16_t a) {
   return vaddlvq_s8(a);
 }
 
-// CHECK-LABEL: define i32 @test_vaddlvq_s16(<8 x i16> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i32 @test_vaddlvq_s16(<8 x i16> %a) #1 {
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   ret i32 [[VADDLV_I]]
 int32_t test_vaddlvq_s16(int16x8_t a) {
   return vaddlvq_s16(a);
 }
 
-// CHECK-LABEL: define i64 @test_vaddlvq_s32(<4 x i32> %a) #0 {
-// CHECK:   [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i64 @test_vaddlvq_s32(<4 x i32> %a) #1 {
+// CHECK:   [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i64 [[VADDLVQ_S32_I]]
 int64_t test_vaddlvq_s32(int32x4_t a) {
   return vaddlvq_s32(a);
 }
 
-// CHECK-LABEL: define i16 @test_vaddlvq_u8(<16 x i8> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i16 @test_vaddlvq_u8(<16 x i8> %a) #1 {
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
 // CHECK:   ret i16 [[TMP0]]
 uint16_t test_vaddlvq_u8(uint8x16_t a) {
   return vaddlvq_u8(a);
 }
 
-// CHECK-LABEL: define i32 @test_vaddlvq_u16(<8 x i16> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i32 @test_vaddlvq_u16(<8 x i16> %a) #1 {
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   ret i32 [[VADDLV_I]]
 uint32_t test_vaddlvq_u16(uint16x8_t a) {
   return vaddlvq_u16(a);
 }
 
-// CHECK-LABEL: define i64 @test_vaddlvq_u32(<4 x i32> %a) #0 {
-// CHECK:   [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i64 @test_vaddlvq_u32(<4 x i32> %a) #1 {
+// CHECK:   [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i64 [[VADDLVQ_U32_I]]
 uint64_t test_vaddlvq_u32(uint32x4_t a) {
   return vaddlvq_u32(a);
 }
 
 // CHECK-LABEL: define i8 @test_vmaxv_s8(<8 x i8> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 int8_t test_vmaxv_s8(int8x8_t a) {
@@ -88,7 +88,7 @@
 }
 
 // CHECK-LABEL: define i16 @test_vmaxv_s16(<4 x i16> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 int16_t test_vmaxv_s16(int16x4_t a) {
@@ -96,7 +96,7 @@
 }
 
 // CHECK-LABEL: define i8 @test_vmaxv_u8(<8 x i8> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 uint8_t test_vmaxv_u8(uint8x8_t a) {
@@ -104,61 +104,61 @@
 }
 
 // CHECK-LABEL: define i16 @test_vmaxv_u16(<4 x i16> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 uint16_t test_vmaxv_u16(uint16x4_t a) {
   return vmaxv_u16(a);
 }
 
-// CHECK-LABEL: define i8 @test_vmaxvq_s8(<16 x i8> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i8 @test_vmaxvq_s8(<16 x i8> %a) #1 {
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 int8_t test_vmaxvq_s8(int8x16_t a) {
   return vmaxvq_s8(a);
 }
 
-// CHECK-LABEL: define i16 @test_vmaxvq_s16(<8 x i16> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i16 @test_vmaxvq_s16(<8 x i16> %a) #1 {
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 int16_t test_vmaxvq_s16(int16x8_t a) {
   return vmaxvq_s16(a);
 }
 
-// CHECK-LABEL: define i32 @test_vmaxvq_s32(<4 x i32> %a) #0 {
-// CHECK:   [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i32 @test_vmaxvq_s32(<4 x i32> %a) #1 {
+// CHECK:   [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i32 [[VMAXVQ_S32_I]]
 int32_t test_vmaxvq_s32(int32x4_t a) {
   return vmaxvq_s32(a);
 }
 
-// CHECK-LABEL: define i8 @test_vmaxvq_u8(<16 x i8> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i8 @test_vmaxvq_u8(<16 x i8> %a) #1 {
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 uint8_t test_vmaxvq_u8(uint8x16_t a) {
   return vmaxvq_u8(a);
 }
 
-// CHECK-LABEL: define i16 @test_vmaxvq_u16(<8 x i16> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i16 @test_vmaxvq_u16(<8 x i16> %a) #1 {
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 uint16_t test_vmaxvq_u16(uint16x8_t a) {
   return vmaxvq_u16(a);
 }
 
-// CHECK-LABEL: define i32 @test_vmaxvq_u32(<4 x i32> %a) #0 {
-// CHECK:   [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i32 @test_vmaxvq_u32(<4 x i32> %a) #1 {
+// CHECK:   [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i32 [[VMAXVQ_U32_I]]
 uint32_t test_vmaxvq_u32(uint32x4_t a) {
   return vmaxvq_u32(a);
 }
 
 // CHECK-LABEL: define i8 @test_vminv_s8(<8 x i8> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 int8_t test_vminv_s8(int8x8_t a) {
@@ -166,7 +166,7 @@
 }
 
 // CHECK-LABEL: define i16 @test_vminv_s16(<4 x i16> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 int16_t test_vminv_s16(int16x4_t a) {
@@ -174,7 +174,7 @@
 }
 
 // CHECK-LABEL: define i8 @test_vminv_u8(<8 x i8> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 uint8_t test_vminv_u8(uint8x8_t a) {
@@ -182,61 +182,61 @@
 }
 
 // CHECK-LABEL: define i16 @test_vminv_u16(<4 x i16> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 uint16_t test_vminv_u16(uint16x4_t a) {
   return vminv_u16(a);
 }
 
-// CHECK-LABEL: define i8 @test_vminvq_s8(<16 x i8> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i8 @test_vminvq_s8(<16 x i8> %a) #1 {
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 int8_t test_vminvq_s8(int8x16_t a) {
   return vminvq_s8(a);
 }
 
-// CHECK-LABEL: define i16 @test_vminvq_s16(<8 x i16> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i16 @test_vminvq_s16(<8 x i16> %a) #1 {
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 int16_t test_vminvq_s16(int16x8_t a) {
   return vminvq_s16(a);
 }
 
-// CHECK-LABEL: define i32 @test_vminvq_s32(<4 x i32> %a) #0 {
-// CHECK:   [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i32 @test_vminvq_s32(<4 x i32> %a) #1 {
+// CHECK:   [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i32 [[VMINVQ_S32_I]]
 int32_t test_vminvq_s32(int32x4_t a) {
   return vminvq_s32(a);
 }
 
-// CHECK-LABEL: define i8 @test_vminvq_u8(<16 x i8> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i8 @test_vminvq_u8(<16 x i8> %a) #1 {
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 uint8_t test_vminvq_u8(uint8x16_t a) {
   return vminvq_u8(a);
 }
 
-// CHECK-LABEL: define i16 @test_vminvq_u16(<8 x i16> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i16 @test_vminvq_u16(<8 x i16> %a) #1 {
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 uint16_t test_vminvq_u16(uint16x8_t a) {
   return vminvq_u16(a);
 }
 
-// CHECK-LABEL: define i32 @test_vminvq_u32(<4 x i32> %a) #0 {
-// CHECK:   [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i32 @test_vminvq_u32(<4 x i32> %a) #1 {
+// CHECK:   [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i32 [[VMINVQ_U32_I]]
 uint32_t test_vminvq_u32(uint32x4_t a) {
   return vminvq_u32(a);
 }
 
 // CHECK-LABEL: define i8 @test_vaddv_s8(<8 x i8> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 int8_t test_vaddv_s8(int8x8_t a) {
@@ -244,7 +244,7 @@
 }
 
 // CHECK-LABEL: define i16 @test_vaddv_s16(<4 x i16> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 int16_t test_vaddv_s16(int16x4_t a) {
@@ -252,7 +252,7 @@
 }
 
 // CHECK-LABEL: define i8 @test_vaddv_u8(<8 x i8> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 uint8_t test_vaddv_u8(uint8x8_t a) {
@@ -260,83 +260,86 @@
 }
 
 // CHECK-LABEL: define i16 @test_vaddv_u16(<4 x i16> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 uint16_t test_vaddv_u16(uint16x4_t a) {
   return vaddv_u16(a);
 }
 
-// CHECK-LABEL: define i8 @test_vaddvq_s8(<16 x i8> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i8 @test_vaddvq_s8(<16 x i8> %a) #1 {
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 int8_t test_vaddvq_s8(int8x16_t a) {
   return vaddvq_s8(a);
 }
 
-// CHECK-LABEL: define i16 @test_vaddvq_s16(<8 x i16> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i16 @test_vaddvq_s16(<8 x i16> %a) #1 {
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 int16_t test_vaddvq_s16(int16x8_t a) {
   return vaddvq_s16(a);
 }
 
-// CHECK-LABEL: define i32 @test_vaddvq_s32(<4 x i32> %a) #0 {
-// CHECK:   [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i32 @test_vaddvq_s32(<4 x i32> %a) #1 {
+// CHECK:   [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i32 [[VADDVQ_S32_I]]
 int32_t test_vaddvq_s32(int32x4_t a) {
   return vaddvq_s32(a);
 }
 
-// CHECK-LABEL: define i8 @test_vaddvq_u8(<16 x i8> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i8 @test_vaddvq_u8(<16 x i8> %a) #1 {
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 uint8_t test_vaddvq_u8(uint8x16_t a) {
   return vaddvq_u8(a);
 }
 
-// CHECK-LABEL: define i16 @test_vaddvq_u16(<8 x i16> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i16 @test_vaddvq_u16(<8 x i16> %a) #1 {
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 uint16_t test_vaddvq_u16(uint16x8_t a) {
   return vaddvq_u16(a);
 }
 
-// CHECK-LABEL: define i32 @test_vaddvq_u32(<4 x i32> %a) #0 {
-// CHECK:   [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i32 @test_vaddvq_u32(<4 x i32> %a) #1 {
+// CHECK:   [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i32 [[VADDVQ_U32_I]]
 uint32_t test_vaddvq_u32(uint32x4_t a) {
   return vaddvq_u32(a);
 }
 
-// CHECK-LABEL: define float @test_vmaxvq_f32(<4 x float> %a) #0 {
-// CHECK:   [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define float @test_vmaxvq_f32(<4 x float> %a) #1 {
+// CHECK:   [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) #3
 // CHECK:   ret float [[VMAXVQ_F32_I]]
 float32_t test_vmaxvq_f32(float32x4_t a) {
   return vmaxvq_f32(a);
 }
 
-// CHECK-LABEL: define float @test_vminvq_f32(<4 x float> %a) #0 {
-// CHECK:   [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define float @test_vminvq_f32(<4 x float> %a) #1 {
+// CHECK:   [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) #3
 // CHECK:   ret float [[VMINVQ_F32_I]]
 float32_t test_vminvq_f32(float32x4_t a) {
   return vminvq_f32(a);
 }
 
-// CHECK-LABEL: define float @test_vmaxnmvq_f32(<4 x float> %a) #0 {
-// CHECK:   [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define float @test_vmaxnmvq_f32(<4 x float> %a) #1 {
+// CHECK:   [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) #3
 // CHECK:   ret float [[VMAXNMVQ_F32_I]]
 float32_t test_vmaxnmvq_f32(float32x4_t a) {
   return vmaxnmvq_f32(a);
 }
 
-// CHECK-LABEL: define float @test_vminnmvq_f32(<4 x float> %a) #0 {
-// CHECK:   [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define float @test_vminnmvq_f32(<4 x float> %a) #1 {
+// CHECK:   [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) #3
 // CHECK:   ret float [[VMINNMVQ_F32_I]]
 float32_t test_vminnmvq_f32(float32x4_t a) {
   return vminnmvq_f32(a);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
Index: test/CodeGen/aarch64-neon-fma.c
===================================================================
--- test/CodeGen/aarch64-neon-fma.c
+++ test/CodeGen/aarch64-neon-fma.c
@@ -14,7 +14,7 @@
   return vmla_n_f32(a, b, c);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
@@ -26,7 +26,7 @@
   return vmlaq_n_f32(a, b, c);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
+// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
@@ -36,7 +36,7 @@
   return vmlaq_n_f64(a, b, c);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
@@ -58,7 +58,7 @@
   return vmls_n_f32(a, b, c);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
+// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
@@ -77,7 +77,7 @@
   return vmla_lane_f32(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
@@ -86,7 +86,7 @@
   return vmlaq_lane_f32(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
 // CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
@@ -95,7 +95,7 @@
   return vmla_laneq_f32(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
@@ -113,7 +113,7 @@
   return vmls_lane_f32(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
@@ -122,7 +122,7 @@
   return vmlsq_lane_f32(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
 // CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
@@ -131,7 +131,7 @@
   return vmls_laneq_f32(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
@@ -149,7 +149,7 @@
   return vmla_lane_f32(a, b, v, 1);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
@@ -158,7 +158,7 @@
   return vmlaq_lane_f32(a, b, v, 1);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
 // CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
@@ -167,7 +167,7 @@
   return vmla_laneq_f32(a, b, v, 3);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
@@ -185,7 +185,7 @@
   return vmls_lane_f32(a, b, v, 1);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
@@ -193,7 +193,7 @@
 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
   return vmlsq_lane_f32(a, b, v, 1);
 }
-// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
 // CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
@@ -202,7 +202,7 @@
   return vmls_laneq_f32(a, b, v, 3);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
@@ -211,7 +211,7 @@
   return vmlsq_laneq_f32(a, b, v, 3);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
+// CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
 // CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> [[VECINIT1_I]], <2 x double> %a)
@@ -220,12 +220,15 @@
   return vfmaq_n_f64(a, b, c);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
+// CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 {
 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
-// CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) #2
+// CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) #3
 // CHECK:   ret <2 x double> [[TMP6]]
 float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
   return vfmsq_n_f64(a, b, c);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
Index: test/CodeGen/aarch64-neon-ldst-one.c
===================================================================
--- test/CodeGen/aarch64-neon-ldst-one.c
+++ test/CodeGen/aarch64-neon-ldst-one.c
@@ -152,7 +152,7 @@
   return vld1q_dup_p64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vld1_dup_u8(i8* %a) #0 {
+// CHECK-LABEL: define <8 x i8> @test_vld1_dup_u8(i8* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a
 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
@@ -161,7 +161,7 @@
   return vld1_dup_u8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vld1_dup_u16(i16* %a) #0 {
+// CHECK-LABEL: define <4 x i16> @test_vld1_dup_u16(i16* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]]
@@ -172,7 +172,7 @@
   return vld1_dup_u16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vld1_dup_u32(i32* %a) #0 {
+// CHECK-LABEL: define <2 x i32> @test_vld1_dup_u32(i32* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
 // CHECK:   [[TMP2:%.*]] = load i32, i32* [[TMP1]]
@@ -183,7 +183,7 @@
   return vld1_dup_u32(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vld1_dup_u64(i64* %a) #0 {
+// CHECK-LABEL: define <1 x i64> @test_vld1_dup_u64(i64* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]]
@@ -194,7 +194,7 @@
   return vld1_dup_u64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vld1_dup_s8(i8* %a) #0 {
+// CHECK-LABEL: define <8 x i8> @test_vld1_dup_s8(i8* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a
 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
@@ -203,7 +203,7 @@
   return vld1_dup_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vld1_dup_s16(i16* %a) #0 {
+// CHECK-LABEL: define <4 x i16> @test_vld1_dup_s16(i16* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]]
@@ -214,7 +214,7 @@
   return vld1_dup_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vld1_dup_s32(i32* %a) #0 {
+// CHECK-LABEL: define <2 x i32> @test_vld1_dup_s32(i32* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
 // CHECK:   [[TMP2:%.*]] = load i32, i32* [[TMP1]]
@@ -225,7 +225,7 @@
   return vld1_dup_s32(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vld1_dup_s64(i64* %a) #0 {
+// CHECK-LABEL: define <1 x i64> @test_vld1_dup_s64(i64* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]]
@@ -236,7 +236,7 @@
   return vld1_dup_s64(a);
 }
 
-// CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #0 {
+// CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
 // CHECK:   [[TMP2:%.*]] = load half, half* [[TMP1]]
@@ -247,7 +247,7 @@
   return vld1_dup_f16(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vld1_dup_f32(float* %a) #0 {
+// CHECK-LABEL: define <2 x float> @test_vld1_dup_f32(float* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
 // CHECK:   [[TMP2:%.*]] = load float, float* [[TMP1]]
@@ -258,7 +258,7 @@
   return vld1_dup_f32(a);
 }
 
-// CHECK-LABEL: define <1 x double> @test_vld1_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define <1 x double> @test_vld1_dup_f64(double* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to double*
 // CHECK:   [[TMP2:%.*]] = load double, double* [[TMP1]]
@@ -269,7 +269,7 @@
   return vld1_dup_f64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vld1_dup_p8(i8* %a) #0 {
+// CHECK-LABEL: define <8 x i8> @test_vld1_dup_p8(i8* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a
 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
@@ -278,7 +278,7 @@
   return vld1_dup_p8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vld1_dup_p16(i16* %a) #0 {
+// CHECK-LABEL: define <4 x i16> @test_vld1_dup_p16(i16* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]]
@@ -289,7 +289,7 @@
   return vld1_dup_p16(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vld1_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define <1 x i64> @test_vld1_dup_p64(i64* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]]
@@ -300,7 +300,7 @@
   return vld1_dup_p64(a);
 }
 
-// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_dup_u64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_dup_u64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
@@ -318,7 +318,7 @@
   return vld2q_dup_u64(a);
 }
 
-// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
@@ -336,7 +336,7 @@
   return vld2q_dup_s64(a);
 }
 
-// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
@@ -354,7 +354,7 @@
   return vld2q_dup_f64(a);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_dup_p64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
@@ -372,7 +372,7 @@
   return vld2q_dup_p64(a);
 }
 
-// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
@@ -390,7 +390,7 @@
   return vld2_dup_f64(a);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_dup_p64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
@@ -408,7 +408,7 @@
   return vld2_dup_p64(a);
 }
 
-// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_dup_u64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_dup_u64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
@@ -427,7 +427,7 @@
   // [{{x[0-9]+|sp}}]
 }
 
-// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
@@ -446,7 +446,7 @@
   // [{{x[0-9]+|sp}}]
 }
 
-// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
@@ -465,7 +465,7 @@
   // [{{x[0-9]+|sp}}]
 }
 
-// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_dup_p64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
@@ -484,7 +484,7 @@
   // [{{x[0-9]+|sp}}]
 }
 
-// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
@@ -503,7 +503,7 @@
   // [{{x[0-9]+|sp}}]
 }
 
-// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_dup_p64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
@@ -522,7 +522,7 @@
   // [{{x[0-9]+|sp}}]
 }
 
-// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_dup_u64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_dup_u64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
@@ -540,7 +540,7 @@
   return vld4q_dup_u64(a);
 }
 
-// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
@@ -558,7 +558,7 @@
   return vld4q_dup_s64(a);
 }
 
-// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
@@ -576,7 +576,7 @@
   return vld4q_dup_f64(a);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_dup_p64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
@@ -594,7 +594,7 @@
   return vld4q_dup_p64(a);
 }
 
-// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
@@ -612,7 +612,7 @@
   return vld4_dup_f64(a);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_dup_p64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
@@ -786,7 +786,7 @@
   return vld1q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vld1_lane_u8(i8* %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: define <8 x i8> @test_vld1_lane_u8(i8* %a, <8 x i8> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a
 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
 // CHECK:   ret <8 x i8> [[VLD1_LANE]]
@@ -794,7 +794,7 @@
   return vld1_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vld1_lane_u16(i16* %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: define <4 x i16> @test_vld1_lane_u16(i16* %a, <4 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
@@ -806,7 +806,7 @@
   return vld1_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vld1_lane_u32(i32* %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: define <2 x i32> @test_vld1_lane_u32(i32* %a, <2 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
@@ -818,7 +818,7 @@
   return vld1_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vld1_lane_u64(i64* %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define <1 x i64> @test_vld1_lane_u64(i64* %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
@@ -830,7 +830,7 @@
   return vld1_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a
 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
 // CHECK:   ret <8 x i8> [[VLD1_LANE]]
@@ -838,7 +838,7 @@
   return vld1_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
@@ -850,7 +850,7 @@
   return vld1_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
@@ -862,7 +862,7 @@
   return vld1_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
@@ -874,7 +874,7 @@
   return vld1_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #0 {
+// CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
@@ -886,7 +886,7 @@
   return vld1_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) #0 {
+// CHECK-LABEL: define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
@@ -898,7 +898,7 @@
   return vld1_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) #0 {
+// CHECK-LABEL: define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
@@ -910,7 +910,7 @@
   return vld1_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vld1_lane_p8(i8* %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: define <8 x i8> @test_vld1_lane_p8(i8* %a, <8 x i8> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a
 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
 // CHECK:   ret <8 x i8> [[VLD1_LANE]]
@@ -918,7 +918,7 @@
   return vld1_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vld1_lane_p16(i16* %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: define <4 x i16> @test_vld1_lane_p16(i16* %a, <4 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
@@ -930,7 +930,7 @@
   return vld1_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vld1_lane_p64(i64* %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define <1 x i64> @test_vld1_lane_p64(i64* %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
@@ -942,7 +942,7 @@
   return vld1_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_lane_s8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_lane_s8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
@@ -971,7 +971,7 @@
   return vld2q_lane_s8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
@@ -1000,7 +1000,7 @@
   return vld2q_lane_u8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
@@ -1029,7 +1029,7 @@
   return vld2q_lane_p8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_lane_s8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_lane_s8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
@@ -1061,7 +1061,7 @@
   return vld3q_lane_s8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
@@ -1093,7 +1093,7 @@
   return vld3q_lane_u8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
@@ -1127,7 +1127,7 @@
   return vld2q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
@@ -1161,7 +1161,7 @@
   return vld2q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
@@ -1195,7 +1195,7 @@
   return vld2q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
@@ -1229,7 +1229,7 @@
   return vld2q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
@@ -1263,7 +1263,7 @@
   return vld2q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
@@ -1297,7 +1297,7 @@
   return vld2q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
@@ -1331,7 +1331,7 @@
   return vld2q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
@@ -1365,7 +1365,7 @@
   return vld2q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
@@ -1399,7 +1399,7 @@
   return vld2q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
@@ -1433,7 +1433,7 @@
   return vld2q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
@@ -1467,7 +1467,7 @@
   return vld2q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
@@ -1496,7 +1496,7 @@
   return vld2_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
@@ -1530,7 +1530,7 @@
   return vld2_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
@@ -1564,7 +1564,7 @@
   return vld2_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
@@ -1598,7 +1598,7 @@
   return vld2_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
@@ -1627,7 +1627,7 @@
   return vld2_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
@@ -1661,7 +1661,7 @@
   return vld2_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
@@ -1695,7 +1695,7 @@
   return vld2_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
@@ -1729,7 +1729,7 @@
   return vld2_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
@@ -1763,7 +1763,7 @@
   return vld2_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
@@ -1797,7 +1797,7 @@
   return vld2_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
@@ -1831,7 +1831,7 @@
   return vld2_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
@@ -1860,7 +1860,7 @@
   return vld2_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
@@ -1894,7 +1894,7 @@
   return vld2_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
@@ -1928,7 +1928,7 @@
   return vld2_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
@@ -1967,7 +1967,7 @@
   return vld3q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
@@ -2006,7 +2006,7 @@
   return vld3q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
@@ -2045,7 +2045,7 @@
   return vld3q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
@@ -2084,7 +2084,7 @@
   return vld3q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
@@ -2123,7 +2123,7 @@
   return vld3q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
@@ -2162,7 +2162,7 @@
   return vld3q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
@@ -2201,7 +2201,7 @@
   return vld3q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
@@ -2240,7 +2240,7 @@
   return vld3q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
@@ -2279,7 +2279,7 @@
   return vld3q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
@@ -2311,7 +2311,7 @@
   return vld3q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
@@ -2350,7 +2350,7 @@
   return vld3q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
@@ -2389,7 +2389,7 @@
   return vld3q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
@@ -2421,7 +2421,7 @@
   return vld3_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
@@ -2460,7 +2460,7 @@
   return vld3_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
@@ -2499,7 +2499,7 @@
   return vld3_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
@@ -2538,7 +2538,7 @@
   return vld3_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
@@ -2570,7 +2570,7 @@
   return vld3_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
@@ -2609,7 +2609,7 @@
   return vld3_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
@@ -2648,7 +2648,7 @@
   return vld3_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
@@ -2687,7 +2687,7 @@
   return vld3_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
@@ -2726,7 +2726,7 @@
   return vld3_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
@@ -2765,7 +2765,7 @@
   return vld3_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
@@ -2804,7 +2804,7 @@
   return vld3_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
@@ -2836,7 +2836,7 @@
   return vld3_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
@@ -2875,7 +2875,7 @@
   return vld3_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
@@ -2914,7 +2914,7 @@
   return vld3_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
@@ -2949,7 +2949,7 @@
   return vld4q_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
@@ -2993,7 +2993,7 @@
   return vld4q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
@@ -3037,7 +3037,7 @@
   return vld4q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
@@ -3081,7 +3081,7 @@
   return vld4q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
@@ -3116,7 +3116,7 @@
   return vld4q_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
@@ -3160,7 +3160,7 @@
   return vld4q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
@@ -3204,7 +3204,7 @@
   return vld4q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
@@ -3248,7 +3248,7 @@
   return vld4q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
@@ -3292,7 +3292,7 @@
   return vld4q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
@@ -3336,7 +3336,7 @@
   return vld4q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
@@ -3380,7 +3380,7 @@
   return vld4q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
@@ -3415,7 +3415,7 @@
   return vld4q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
@@ -3459,7 +3459,7 @@
   return vld4q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
@@ -3503,7 +3503,7 @@
   return vld4q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
@@ -3538,7 +3538,7 @@
   return vld4_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
@@ -3582,7 +3582,7 @@
   return vld4_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
@@ -3626,7 +3626,7 @@
   return vld4_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
@@ -3670,7 +3670,7 @@
   return vld4_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
@@ -3705,7 +3705,7 @@
   return vld4_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
@@ -3749,7 +3749,7 @@
   return vld4_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
@@ -3793,7 +3793,7 @@
   return vld4_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
@@ -3837,7 +3837,7 @@
   return vld4_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
@@ -3881,7 +3881,7 @@
   return vld4_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
@@ -3925,7 +3925,7 @@
   return vld4_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
@@ -3969,7 +3969,7 @@
   return vld4_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
@@ -4004,7 +4004,7 @@
   return vld4_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
@@ -4048,7 +4048,7 @@
   return vld4_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
@@ -4248,7 +4248,7 @@
   vst1q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_u8(i8* %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_u8(i8* %a, <8 x i8> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
 // CHECK:   store i8 [[TMP0]], i8* %a
 // CHECK:   ret void
@@ -4256,7 +4256,7 @@
   vst1_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_u16(i16* %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_u16(i16* %a, <4 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
@@ -4268,7 +4268,7 @@
   vst1_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_u32(i32* %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_u32(i32* %a, <2 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
@@ -4280,7 +4280,7 @@
   vst1_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_u64(i64* %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_u64(i64* %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
@@ -4292,7 +4292,7 @@
   vst1_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
 // CHECK:   store i8 [[TMP0]], i8* %a
 // CHECK:   ret void
@@ -4300,7 +4300,7 @@
   vst1_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
@@ -4312,7 +4312,7 @@
   vst1_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
@@ -4324,7 +4324,7 @@
   vst1_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
@@ -4336,7 +4336,7 @@
   vst1_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_f16(half* %a, <4 x half> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_f16(half* %a, <4 x half> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
@@ -4348,7 +4348,7 @@
   vst1_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_f32(float* %a, <2 x float> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_f32(float* %a, <2 x float> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
@@ -4360,7 +4360,7 @@
   vst1_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_f64(double* %a, <1 x double> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_f64(double* %a, <1 x double> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
@@ -4372,7 +4372,7 @@
   vst1_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_p8(i8* %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_p8(i8* %a, <8 x i8> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
 // CHECK:   store i8 [[TMP0]], i8* %a
 // CHECK:   ret void
@@ -4380,7 +4380,7 @@
   vst1_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_p16(i16* %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_p16(i16* %a, <4 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
@@ -4392,7 +4392,7 @@
   vst1_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_p64(i64* %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_p64(i64* %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
@@ -4404,7 +4404,7 @@
   vst1_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
@@ -4424,7 +4424,7 @@
   vst2q_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
@@ -4449,7 +4449,7 @@
   vst2q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
@@ -4474,7 +4474,7 @@
   vst2q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
@@ -4499,7 +4499,7 @@
   vst2q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
@@ -4519,7 +4519,7 @@
   vst2q_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
@@ -4544,7 +4544,7 @@
   vst2q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
@@ -4569,7 +4569,7 @@
   vst2q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
@@ -4594,7 +4594,7 @@
   vst2q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
@@ -4619,7 +4619,7 @@
   vst2q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
@@ -4644,7 +4644,7 @@
   vst2q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
@@ -4669,7 +4669,7 @@
   vst2q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
@@ -4689,7 +4689,7 @@
   vst2q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
@@ -4714,7 +4714,7 @@
   vst2q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0
@@ -4739,7 +4739,7 @@
   vst2q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
@@ -4759,7 +4759,7 @@
   vst2_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
@@ -4784,7 +4784,7 @@
   vst2_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
@@ -4809,7 +4809,7 @@
   vst2_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
@@ -4834,7 +4834,7 @@
   vst2_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
@@ -4854,7 +4854,7 @@
   vst2_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
@@ -4879,7 +4879,7 @@
   vst2_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
@@ -4904,7 +4904,7 @@
   vst2_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
@@ -4929,7 +4929,7 @@
   vst2_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
@@ -4954,7 +4954,7 @@
   vst2_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
@@ -4979,7 +4979,7 @@
   vst2_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
@@ -5004,7 +5004,7 @@
   vst2_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
@@ -5024,7 +5024,7 @@
   vst2_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
@@ -5049,7 +5049,7 @@
   vst2_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0
@@ -5074,7 +5074,7 @@
   vst2_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
@@ -5097,7 +5097,7 @@
   vst3q_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
@@ -5127,7 +5127,7 @@
   vst3q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
@@ -5157,7 +5157,7 @@
   vst3q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
@@ -5187,7 +5187,7 @@
   vst3q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
@@ -5210,7 +5210,7 @@
   vst3q_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
@@ -5240,7 +5240,7 @@
   vst3q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
@@ -5270,7 +5270,7 @@
   vst3q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
@@ -5300,7 +5300,7 @@
   vst3q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
@@ -5330,7 +5330,7 @@
   vst3q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
@@ -5360,7 +5360,7 @@
   vst3q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
@@ -5390,7 +5390,7 @@
   vst3q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
@@ -5413,7 +5413,7 @@
   vst3q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
@@ -5443,7 +5443,7 @@
   vst3q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0
@@ -5473,7 +5473,7 @@
   vst3q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
@@ -5496,7 +5496,7 @@
   vst3_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
@@ -5526,7 +5526,7 @@
   vst3_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
@@ -5556,7 +5556,7 @@
   vst3_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
@@ -5586,7 +5586,7 @@
   vst3_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
@@ -5609,7 +5609,7 @@
   vst3_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
@@ -5639,7 +5639,7 @@
   vst3_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
@@ -5669,7 +5669,7 @@
   vst3_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
@@ -5699,7 +5699,7 @@
   vst3_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
@@ -5729,7 +5729,7 @@
   vst3_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
@@ -5759,7 +5759,7 @@
   vst3_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
@@ -5789,7 +5789,7 @@
   vst3_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
@@ -5812,7 +5812,7 @@
   vst3_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
@@ -5842,7 +5842,7 @@
   vst3_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0
@@ -5872,7 +5872,7 @@
   vst3_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
@@ -5898,7 +5898,7 @@
   vst4q_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
@@ -5933,7 +5933,7 @@
   vst4q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
@@ -5968,7 +5968,7 @@
   vst4q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
@@ -6003,7 +6003,7 @@
   vst4q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
@@ -6029,7 +6029,7 @@
   vst4q_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
@@ -6064,7 +6064,7 @@
   vst4q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
@@ -6099,7 +6099,7 @@
   vst4q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
@@ -6134,7 +6134,7 @@
   vst4q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
@@ -6169,7 +6169,7 @@
   vst4q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
@@ -6204,7 +6204,7 @@
   vst4q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
@@ -6239,7 +6239,7 @@
   vst4q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
@@ -6265,7 +6265,7 @@
   vst4q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
@@ -6300,7 +6300,7 @@
   vst4q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0
@@ -6335,7 +6335,7 @@
   vst4q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
@@ -6361,7 +6361,7 @@
   vst4_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
@@ -6396,7 +6396,7 @@
   vst4_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
@@ -6431,7 +6431,7 @@
   vst4_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
@@ -6466,7 +6466,7 @@
   vst4_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
@@ -6492,7 +6492,7 @@
   vst4_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
@@ -6527,7 +6527,7 @@
   vst4_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
@@ -6562,7 +6562,7 @@
   vst4_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
@@ -6597,7 +6597,7 @@
   vst4_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
@@ -6632,7 +6632,7 @@
   vst4_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
@@ -6667,7 +6667,7 @@
   vst4_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
@@ -6702,7 +6702,7 @@
   vst4_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
@@ -6728,7 +6728,7 @@
   vst4_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
@@ -6763,7 +6763,7 @@
   vst4_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0
@@ -6797,3 +6797,7 @@
 void test_vst4_lane_p64(poly64_t  *a, poly64x1x4_t b) {
   vst4_lane_p64(a, b, 0);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="128"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="64"
+// CHECK-NOT: attributes #2 ={{.*}}"min-legal-vector-width"
Index: test/CodeGen/aarch64-neon-scalar-copy.c
===================================================================
--- test/CodeGen/aarch64-neon-scalar-copy.c
+++ test/CodeGen/aarch64-neon-scalar-copy.c
@@ -23,7 +23,7 @@
 }
 
 
-// CHECK-LABEL: define float @test_vdups_laneq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: define float @test_vdups_laneq_f32(<4 x float> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
@@ -33,7 +33,7 @@
 }
 
 
-// CHECK-LABEL: define double @test_vdupd_laneq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: define double @test_vdupd_laneq_f64(<2 x double> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -118,7 +118,7 @@
   return vdupd_lane_u64(a, 0);
 }
 
-// CHECK-LABEL: define i8 @test_vdupb_laneq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: define i8 @test_vdupb_laneq_s8(<16 x i8> %a) #1 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
 // CHECK:   ret i8 [[VGETQ_LANE]]
 int8_t test_vdupb_laneq_s8(int8x16_t a) {
@@ -126,7 +126,7 @@
 }
 
 
-// CHECK-LABEL: define i16 @test_vduph_laneq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: define i16 @test_vduph_laneq_s16(<8 x i16> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -136,7 +136,7 @@
 }
 
 
-// CHECK-LABEL: define i32 @test_vdups_laneq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: define i32 @test_vdups_laneq_s32(<4 x i32> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -146,7 +146,7 @@
 }
 
 
-// CHECK-LABEL: define i64 @test_vdupd_laneq_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: define i64 @test_vdupd_laneq_s64(<2 x i64> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -156,7 +156,7 @@
 }
 
 
-// CHECK-LABEL: define i8 @test_vdupb_laneq_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: define i8 @test_vdupb_laneq_u8(<16 x i8> %a) #1 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
 // CHECK:   ret i8 [[VGETQ_LANE]]
 uint8_t test_vdupb_laneq_u8(uint8x16_t a) {
@@ -164,7 +164,7 @@
 }
 
 
-// CHECK-LABEL: define i16 @test_vduph_laneq_u16(<8 x i16> %a) #0 {
+// CHECK-LABEL: define i16 @test_vduph_laneq_u16(<8 x i16> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -174,7 +174,7 @@
 }
 
 
-// CHECK-LABEL: define i32 @test_vdups_laneq_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: define i32 @test_vdups_laneq_u32(<4 x i32> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -184,7 +184,7 @@
 }
 
 
-// CHECK-LABEL: define i64 @test_vdupd_laneq_u64(<2 x i64> %a) #0 {
+// CHECK-LABEL: define i64 @test_vdupd_laneq_u64(<2 x i64> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -209,14 +209,14 @@
   return vduph_lane_p16(a, 3);
 }
 
-// CHECK-LABEL: define i8 @test_vdupb_laneq_p8(<16 x i8> %a) #0 {
+// CHECK-LABEL: define i8 @test_vdupb_laneq_p8(<16 x i8> %a) #1 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
 // CHECK:   ret i8 [[VGETQ_LANE]]
 poly8_t test_vdupb_laneq_p8(poly8x16_t a) {
   return vdupb_laneq_p8(a, 15);
 }
 
-// CHECK-LABEL: define i16 @test_vduph_laneq_p16(<8 x i16> %a) #0 {
+// CHECK-LABEL: define i16 @test_vduph_laneq_p16(<8 x i16> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -225,3 +225,5 @@
   return vduph_laneq_p16(a, 7);
 }
 
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
Index: test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
===================================================================
--- test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
+++ test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
@@ -26,7 +26,7 @@
   return vmuld_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define float @test_vmuls_laneq_f32(float %a, <4 x float> %b) #0 {
+// CHECK-LABEL: define float @test_vmuls_laneq_f32(float %a, <4 x float> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
@@ -36,7 +36,7 @@
   return vmuls_laneq_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define double @test_vmuld_laneq_f64(double %a, <2 x double> %b) #0 {
+// CHECK-LABEL: define double @test_vmuld_laneq_f64(double %a, <2 x double> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -65,7 +65,7 @@
   return vmulxs_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define float @test_vmulxs_laneq_f32(float %a, <4 x float> %b) #0 {
+// CHECK-LABEL: define float @test_vmulxs_laneq_f32(float %a, <4 x float> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
@@ -85,7 +85,7 @@
   return vmulxd_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define double @test_vmulxd_laneq_f64(double %a, <2 x double> %b) #0 {
+// CHECK-LABEL: define double @test_vmulxd_laneq_f64(double %a, <2 x double> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -112,7 +112,7 @@
 }
 
 
-// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_0(<1 x double> %a, <2 x double> %b) #0 {
+// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_0(<1 x double> %a, <2 x double> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
@@ -128,7 +128,7 @@
   return vmulx_laneq_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_1(<1 x double> %a, <2 x double> %b) #0 {
+// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_1(<1 x double> %a, <2 x double> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
@@ -165,7 +165,7 @@
   return vfmad_lane_f64(a, b, c, 0);
 }
 
-// CHECK-LABEL: define double @test_vfmad_laneq_f64(double %a, double %b, <2 x double> %c) #0 {
+// CHECK-LABEL: define double @test_vfmad_laneq_f64(double %a, double %b, <2 x double> %c) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %c to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
 // CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -215,7 +215,7 @@
   return vfms_lane_f64(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <1 x double> @test_vfma_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #0 {
+// CHECK-LABEL: define <1 x double> @test_vfma_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
@@ -230,7 +230,7 @@
   return vfma_laneq_f64(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <1 x double> @test_vfms_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #0 {
+// CHECK-LABEL: define <1 x double> @test_vfms_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #1 {
 // CHECK:   [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
@@ -269,7 +269,7 @@
   return vqdmulls_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define i32 @test_vqdmullh_laneq_s16(i16 %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define i32 @test_vqdmullh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -282,7 +282,7 @@
   return vqdmullh_laneq_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define i64 @test_vqdmulls_laneq_s32(i32 %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define i64 @test_vqdmulls_laneq_s32(i32 %a, <4 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -316,7 +316,7 @@
 }
 
 
-// CHECK-LABEL: define i16 @test_vqdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define i16 @test_vqdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -330,7 +330,7 @@
 }
 
 
-// CHECK-LABEL: define i32 @test_vqdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define i32 @test_vqdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -364,7 +364,7 @@
 }
 
 
-// CHECK-LABEL: define i16 @test_vqrdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define i16 @test_vqrdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -378,7 +378,7 @@
 }
 
 
-// CHECK-LABEL: define i32 @test_vqrdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define i32 @test_vqrdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -413,7 +413,7 @@
   return vqdmlals_lane_s32(a, b, c, 1);
 }
 
-// CHECK-LABEL: define i32 @test_vqdmlalh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #0 {
+// CHECK-LABEL: define i32 @test_vqdmlalh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -427,7 +427,7 @@
   return vqdmlalh_laneq_s16(a, b, c, 7);
 }
 
-// CHECK-LABEL: define i64 @test_vqdmlals_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #0 {
+// CHECK-LABEL: define i64 @test_vqdmlals_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -463,7 +463,7 @@
   return vqdmlsls_lane_s32(a, b, c, 1);
 }
 
-// CHECK-LABEL: define i32 @test_vqdmlslh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #0 {
+// CHECK-LABEL: define i32 @test_vqdmlslh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -477,7 +477,7 @@
   return vqdmlslh_laneq_s16(a, b, c, 7);
 }
 
-// CHECK-LABEL: define i64 @test_vqdmlsls_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #0 {
+// CHECK-LABEL: define i64 @test_vqdmlsls_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -513,7 +513,7 @@
       return result;
 }
 
-// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_2() #0 {
+// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_2() #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
 // CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
@@ -540,3 +540,6 @@
       result = vmulx_laneq_f64(arg1, arg3, 1);
       return result;
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
Index: test/CodeGen/aarch64-neon-tbl.c
===================================================================
--- test/CodeGen/aarch64-neon-tbl.c
+++ test/CodeGen/aarch64-neon-tbl.c
@@ -7,14 +7,14 @@
 
 // CHECK-LABEL: define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL11_I]]
 int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
   return vtbl1_s8(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) #0 {
-// CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2
+// CHECK-LABEL: define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) #1 {
+// CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL1_I]]
 int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) {
   return vqtbl1_s8(a, b);
@@ -36,7 +36,7 @@
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL13_I]]
 int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
   return vtbl2_s8(a, b);
@@ -57,7 +57,7 @@
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2
+// CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL2_I]]
 int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) {
   return vqtbl2_s8(a, b);
@@ -83,7 +83,7 @@
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL26_I]]
 int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
   return vtbl3_s8(a, b);
@@ -107,7 +107,7 @@
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2
+// CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL3_I]]
 int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) {
   return vqtbl3_s8(a, b);
@@ -136,7 +136,7 @@
 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL28_I]]
 int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
   return vtbl4_s8(a, b);
@@ -163,20 +163,20 @@
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2
+// CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL4_I]]
 int8x8_t test_vqtbl4_s8(int8x16x4_t a, int8x8_t b) {
   return vqtbl4_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
-// CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2
+// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) #1 {
+// CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL1_I]]
 int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
   return vqtbl1q_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[A]], i32 0, i32 0
@@ -191,13 +191,13 @@
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2
+// CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL2_I]]
 int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
   return vqtbl2q_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[A]], i32 0, i32 0
@@ -215,13 +215,13 @@
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2
+// CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL3_I]]
 int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
   return vqtbl3q_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[A]], i32 0, i32 0
@@ -242,7 +242,7 @@
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2
+// CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL4_I]]
 int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
   return vqtbl4q_s8(a, b);
@@ -250,7 +250,7 @@
 
 // CHECK-LABEL: define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3
 // CHECK:   [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
 // CHECK:   [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
@@ -278,7 +278,7 @@
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
 // CHECK:   [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX13_I]]
 int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
   return vtbx2_s8(a, b, c);
@@ -304,7 +304,7 @@
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3
 // CHECK:   [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
 // CHECK:   [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
 // CHECK:   [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
@@ -339,14 +339,14 @@
 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
 // CHECK:   [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX28_I]]
 int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
   return vtbx4_s8(a, b, c);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 {
-// CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2
+// CHECK-LABEL: define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 {
+// CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX1_I]]
 int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) {
   return vqtbx1_s8(a, b, c);
@@ -367,7 +367,7 @@
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2
+// CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX2_I]]
 int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) {
   return vqtbx2_s8(a, b, c);
@@ -391,7 +391,7 @@
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2
+// CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX3_I]]
 int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) {
   return vqtbx3_s8(a, b, c);
@@ -418,20 +418,20 @@
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2
+// CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX4_I]]
 int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, int8x8_t c) {
   return vqtbx4_s8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
-// CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2
+// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 {
+// CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX1_I]]
 int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
   return vqtbx1q_s8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
@@ -446,13 +446,13 @@
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2
+// CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX2_I]]
 int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
   return vqtbx2q_s8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
@@ -470,13 +470,13 @@
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2
+// CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX3_I]]
 int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
   return vqtbx3q_s8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
@@ -497,7 +497,7 @@
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2
+// CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX4_I]]
 int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
   return vqtbx4q_s8(a, b, c);
@@ -505,14 +505,14 @@
 
 // CHECK-LABEL: define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL11_I]]
 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
   return vtbl1_u8(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) #0 {
-// CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2
+// CHECK-LABEL: define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) #1 {
+// CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL1_I]]
 uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
   return vqtbl1_u8(a, b);
@@ -534,7 +534,7 @@
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL13_I]]
 uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
   return vtbl2_u8(a, b);
@@ -555,7 +555,7 @@
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2
+// CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL2_I]]
 uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
   return vqtbl2_u8(a, b);
@@ -581,7 +581,7 @@
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL26_I]]
 uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
   return vtbl3_u8(a, b);
@@ -605,7 +605,7 @@
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2
+// CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL3_I]]
 uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
   return vqtbl3_u8(a, b);
@@ -634,7 +634,7 @@
 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL28_I]]
 uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
   return vtbl4_u8(a, b);
@@ -661,20 +661,20 @@
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2
+// CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL4_I]]
 uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) {
   return vqtbl4_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
-// CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2
+// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) #1 {
+// CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL1_I]]
 uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
   return vqtbl1q_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[A]], i32 0, i32 0
@@ -689,13 +689,13 @@
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2
+// CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL2_I]]
 uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
   return vqtbl2q_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[A]], i32 0, i32 0
@@ -713,13 +713,13 @@
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2
+// CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL3_I]]
 uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
   return vqtbl3q_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[A]], i32 0, i32 0
@@ -740,7 +740,7 @@
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2
+// CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL4_I]]
 uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
   return vqtbl4q_u8(a, b);
@@ -748,7 +748,7 @@
 
 // CHECK-LABEL: define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3
 // CHECK:   [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
 // CHECK:   [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
@@ -776,7 +776,7 @@
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
 // CHECK:   [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX13_I]]
 uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
   return vtbx2_u8(a, b, c);
@@ -802,7 +802,7 @@
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3
 // CHECK:   [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
 // CHECK:   [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
 // CHECK:   [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
@@ -837,14 +837,14 @@
 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
 // CHECK:   [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX28_I]]
 uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
   return vtbx4_u8(a, b, c);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 {
-// CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2
+// CHECK-LABEL: define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 {
+// CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX1_I]]
 uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
   return vqtbx1_u8(a, b, c);
@@ -865,7 +865,7 @@
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2
+// CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX2_I]]
 uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
   return vqtbx2_u8(a, b, c);
@@ -889,7 +889,7 @@
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2
+// CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX3_I]]
 uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
   return vqtbx3_u8(a, b, c);
@@ -916,20 +916,20 @@
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2
+// CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX4_I]]
 uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) {
   return vqtbx4_u8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
-// CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2
+// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 {
+// CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX1_I]]
 uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
   return vqtbx1q_u8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
@@ -944,13 +944,13 @@
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2
+// CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX2_I]]
 uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
   return vqtbx2q_u8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
@@ -968,13 +968,13 @@
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2
+// CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX3_I]]
 uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
   return vqtbx3q_u8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
@@ -995,7 +995,7 @@
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2
+// CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX4_I]]
 uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
   return vqtbx4q_u8(a, b, c);
@@ -1003,14 +1003,14 @@
 
 // CHECK-LABEL: define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL11_I]]
 poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
   return vtbl1_p8(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) #0 {
-// CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2
+// CHECK-LABEL: define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) #1 {
+// CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL1_I]]
 poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
   return vqtbl1_p8(a, b);
@@ -1032,7 +1032,7 @@
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL13_I]]
 poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
   return vtbl2_p8(a, b);
@@ -1053,7 +1053,7 @@
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2
+// CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL2_I]]
 poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
   return vqtbl2_p8(a, b);
@@ -1079,7 +1079,7 @@
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL26_I]]
 poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
   return vtbl3_p8(a, b);
@@ -1103,7 +1103,7 @@
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2
+// CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL3_I]]
 poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
   return vqtbl3_p8(a, b);
@@ -1132,7 +1132,7 @@
 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL28_I]]
 poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
   return vtbl4_p8(a, b);
@@ -1159,20 +1159,20 @@
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2
+// CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL4_I]]
 poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) {
   return vqtbl4_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
-// CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2
+// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) #1 {
+// CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL1_I]]
 poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
   return vqtbl1q_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[A]], i32 0, i32 0
@@ -1187,13 +1187,13 @@
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2
+// CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL2_I]]
 poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
   return vqtbl2q_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[A]], i32 0, i32 0
@@ -1211,13 +1211,13 @@
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2
+// CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL3_I]]
 poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
   return vqtbl3q_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[A]], i32 0, i32 0
@@ -1238,7 +1238,7 @@
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2
+// CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL4_I]]
 poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
   return vqtbl4q_p8(a, b);
@@ -1246,7 +1246,7 @@
 
 // CHECK-LABEL: define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3
 // CHECK:   [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
 // CHECK:   [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
@@ -1274,7 +1274,7 @@
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
 // CHECK:   [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX13_I]]
 poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
   return vtbx2_p8(a, b, c);
@@ -1300,7 +1300,7 @@
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3
 // CHECK:   [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
 // CHECK:   [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
 // CHECK:   [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
@@ -1335,14 +1335,14 @@
 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
 // CHECK:   [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX28_I]]
 poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
   return vtbx4_p8(a, b, c);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 {
-// CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2
+// CHECK-LABEL: define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 {
+// CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX1_I]]
 poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
   return vqtbx1_p8(a, b, c);
@@ -1363,7 +1363,7 @@
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2
+// CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX2_I]]
 poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
   return vqtbx2_p8(a, b, c);
@@ -1387,7 +1387,7 @@
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2
+// CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX3_I]]
 poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
   return vqtbx3_p8(a, b, c);
@@ -1414,20 +1414,20 @@
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2
+// CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX4_I]]
 poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) {
   return vqtbx4_p8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
-// CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2
+// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 {
+// CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX1_I]]
 poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
   return vqtbx1q_p8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
@@ -1442,13 +1442,13 @@
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2
+// CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX2_I]]
 poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
   return vqtbx2q_p8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
@@ -1466,13 +1466,13 @@
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2
+// CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX3_I]]
 poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
   return vqtbx3q_p8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
@@ -1493,8 +1493,11 @@
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2
+// CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX4_I]]
 poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) {
   return vqtbx4q_p8(a, b, c);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
Index: test/CodeGen/aarch64-neon-vget.c
===================================================================
--- test/CodeGen/aarch64-neon-vget.c
+++ test/CodeGen/aarch64-neon-vget.c
@@ -97,14 +97,14 @@
   return vget_lane_f16(a, 1);
 }
 
-// CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #1 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
 // CHECK:   ret i8 [[VGETQ_LANE]]
 uint8_t test_vgetq_lane_u8(uint8x16_t a) {
   return vgetq_lane_u8(a, 15);
 }
 
-// CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 {
+// CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -113,7 +113,7 @@
   return vgetq_lane_u16(a, 7);
 }
 
-// CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -122,14 +122,14 @@
   return vgetq_lane_u32(a, 3);
 }
 
-// CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #1 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
 // CHECK:   ret i8 [[VGETQ_LANE]]
 int8_t test_vgetq_lane_s8(int8x16_t a) {
   return vgetq_lane_s8(a, 15);
 }
 
-// CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -138,7 +138,7 @@
   return vgetq_lane_s16(a, 7);
 }
 
-// CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -147,14 +147,14 @@
   return vgetq_lane_s32(a, 3);
 }
 
-// CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #0 {
+// CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #1 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
 // CHECK:   ret i8 [[VGETQ_LANE]]
 poly8_t test_vgetq_lane_p8(poly8x16_t a) {
   return vgetq_lane_p8(a, 15);
 }
 
-// CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #0 {
+// CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -163,7 +163,7 @@
   return vgetq_lane_p16(a, 7);
 }
 
-// CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
@@ -172,7 +172,7 @@
   return vgetq_lane_f32(a, 3);
 }
 
-// CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #0 {
+// CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #1 {
 // CHECK:   [[__REINT_244:%.*]] = alloca <8 x half>, align 16
 // CHECK:   [[__REINT1_244:%.*]] = alloca i16, align 2
 // CHECK:   store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
@@ -208,7 +208,7 @@
   return vget_lane_u64(a, 0);
 }
 
-// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -217,7 +217,7 @@
   return vgetq_lane_s64(a, 1);
 }
 
-// CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #0 {
+// CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -324,14 +324,14 @@
   return vset_lane_f16(*a, b, 3);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #1 {
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
 // CHECK:   ret <16 x i8> [[VSET_LANE]]
 uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
   return vsetq_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
@@ -340,7 +340,7 @@
   return vsetq_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
@@ -349,14 +349,14 @@
   return vsetq_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #1 {
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
 // CHECK:   ret <16 x i8> [[VSET_LANE]]
 int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
   return vsetq_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
@@ -365,7 +365,7 @@
   return vsetq_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
@@ -374,14 +374,14 @@
   return vsetq_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #1 {
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
 // CHECK:   ret <16 x i8> [[VSET_LANE]]
 poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
   return vsetq_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
@@ -390,7 +390,7 @@
   return vsetq_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #0 {
+// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3
@@ -399,7 +399,7 @@
   return vsetq_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #0 {
+// CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #1 {
 // CHECK:   [[__REINT_248:%.*]] = alloca half, align 2
 // CHECK:   [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
 // CHECK:   [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
@@ -439,7 +439,7 @@
   return vset_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
@@ -448,7 +448,7 @@
   return vsetq_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
@@ -456,3 +456,6 @@
 uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
   return vsetq_lane_u64(a, b, 1);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
Index: test/CodeGen/aarch64-poly64.c
===================================================================
--- test/CodeGen/aarch64-poly64.c
+++ test/CodeGen/aarch64-poly64.c
@@ -14,7 +14,7 @@
   return vceq_p64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vceqq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vceqq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %a, %b
 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[SEXT_I]]
@@ -31,7 +31,7 @@
   return vtst_p64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[TMP4:%.*]] = and <2 x i64> %a, %b
 // CHECK:   [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
@@ -50,7 +50,7 @@
   return vbsl_p64(a, b, c);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #1 {
 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %a, %b
 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %a, <i64 -1, i64 -1>
 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c
@@ -69,7 +69,7 @@
   return vget_lane_p64(v, 0);
 }
 
-// CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #0 {
+// CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -87,7 +87,7 @@
   return vset_lane_p64(a, v, 0);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
@@ -109,7 +109,7 @@
 
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
@@ -121,7 +121,7 @@
   return vcopyq_lane_p64(a, 1, b, 0);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -146,7 +146,7 @@
 poly64x1_t test_vdup_n_p64(poly64_t a) {
   return vdup_n_p64(a);
 }
-// CHECK-LABEL: define <2 x i64> @test_vdupq_n_p64(i64 %a) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vdupq_n_p64(i64 %a) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
 // CHECK:   ret <2 x i64> [[VECINIT1_I]]
@@ -161,7 +161,7 @@
   return vmov_n_p64(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vmovq_n_p64(i64 %a) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vmovq_n_p64(i64 %a) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
 // CHECK:   ret <2 x i64> [[VECINIT1_I]]
@@ -176,21 +176,21 @@
   return vdup_lane_p64(vec, 0);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <2 x i32> zeroinitializer
 // CHECK:   ret <2 x i64> [[SHUFFLE]]
 poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) {
   return vdupq_lane_p64(vec, 0);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i64> %vec, <2 x i64> %vec, <2 x i32> <i32 1, i32 1>
 // CHECK:   ret <2 x i64> [[SHUFFLE]]
 poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) {
   return vdupq_laneq_p64(vec, 1);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcombine_p64(<1 x i64> %low, <1 x i64> %high) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vcombine_p64(<1 x i64> %low, <1 x i64> %high) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %low, <1 x i64> %high, <2 x i32> <i32 0, i32 1>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) {
@@ -206,7 +206,7 @@
   return vld1_p64(ptr);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vld1q_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vld1q_p64(i64* %ptr) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %ptr to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
 // CHECK:   [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
@@ -226,7 +226,7 @@
   return vst1_p64(ptr, val);
 }
 
-// CHECK-LABEL: define void @test_vst1q_p64(i64* %ptr, <2 x i64> %val) #0 {
+// CHECK-LABEL: define void @test_vst1q_p64(i64* %ptr, <2 x i64> %val) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %ptr to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %val to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
@@ -237,7 +237,7 @@
   return vst1q_p64(ptr, val);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_p64(i64* %ptr) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
@@ -255,7 +255,7 @@
   return vld2_p64(ptr);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_p64(i64* %ptr) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
@@ -273,7 +273,7 @@
   return vld2q_p64(ptr);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_p64(i64* %ptr) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
@@ -291,7 +291,7 @@
   return vld3_p64(ptr);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_p64(i64* %ptr) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
@@ -309,7 +309,7 @@
   return vld3q_p64(ptr);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_p64(i64* %ptr) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
@@ -327,7 +327,7 @@
   return vld4_p64(ptr);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_p64(i64* %ptr) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
@@ -345,7 +345,7 @@
   return vld4q_p64(ptr);
 }
 
-// CHECK-LABEL: define void @test_vst2_p64(i64* %ptr, [2 x <1 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_p64(i64* %ptr, [2 x <1 x i64>] %val.coerce) #2 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[VAL]], i32 0, i32 0
@@ -370,7 +370,7 @@
   return vst2_p64(ptr, val);
 }
 
-// CHECK-LABEL: define void @test_vst2q_p64(i64* %ptr, [2 x <2 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_p64(i64* %ptr, [2 x <2 x i64>] %val.coerce) #2 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[VAL]], i32 0, i32 0
@@ -395,7 +395,7 @@
   return vst2q_p64(ptr, val);
 }
 
-// CHECK-LABEL: define void @test_vst3_p64(i64* %ptr, [3 x <1 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_p64(i64* %ptr, [3 x <1 x i64>] %val.coerce) #2 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[VAL]], i32 0, i32 0
@@ -425,7 +425,7 @@
   return vst3_p64(ptr, val);
 }
 
-// CHECK-LABEL: define void @test_vst3q_p64(i64* %ptr, [3 x <2 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_p64(i64* %ptr, [3 x <2 x i64>] %val.coerce) #2 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[VAL]], i32 0, i32 0
@@ -455,7 +455,7 @@
   return vst3q_p64(ptr, val);
 }
 
-// CHECK-LABEL: define void @test_vst4_p64(i64* %ptr, [4 x <1 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_p64(i64* %ptr, [4 x <1 x i64>] %val.coerce) #2 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[VAL]], i32 0, i32 0
@@ -490,7 +490,7 @@
   return vst4_p64(ptr, val);
 }
 
-// CHECK-LABEL: define void @test_vst4q_p64(i64* %ptr, [4 x <2 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_p64(i64* %ptr, [4 x <2 x i64>] %val.coerce) #2 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[VAL]], i32 0, i32 0
@@ -537,7 +537,7 @@
 
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vextq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vextq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
@@ -548,42 +548,42 @@
   return vextq_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vzip1q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vzip1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) {
   return vzip1q_p64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vzip2q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vzip2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) {
   return vzip2q_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vuzp1q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vuzp1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) {
   return vuzp1q_p64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vuzp2q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vuzp2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) {
   return vuzp2q_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vtrn1q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vtrn1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) {
   return vtrn1q_p64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vtrn2q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vtrn2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) {
@@ -601,7 +601,7 @@
   return vsri_n_p64(a, b, 33);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vsriq_n_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vsriq_n_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
@@ -612,3 +612,6 @@
   return vsriq_n_p64(a, b, 64);
 }
 
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
+// CHECK-NOT: attributes #2 ={{.*}}"min-legal-vector-width"
Index: test/CodeGen/arm-neon-fma.c
===================================================================
--- test/CodeGen/arm-neon-fma.c
+++ test/CodeGen/arm-neon-fma.c
@@ -8,14 +8,14 @@
 #include <arm_neon.h>
 
 // CHECK-LABEL: define <2 x float> @test_fma_order(<2 x float> %accum, <2 x float> %lhs, <2 x float> %rhs) #0 {
-// CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %rhs, <2 x float> %accum) #2
+// CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %rhs, <2 x float> %accum) #3
 // CHECK:   ret <2 x float> [[TMP6]]
 float32x2_t test_fma_order(float32x2_t accum, float32x2_t lhs, float32x2_t rhs) {
   return vfma_f32(accum, lhs, rhs);
 }
 
-// CHECK-LABEL: define <4 x float> @test_fmaq_order(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) #0 {
-// CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %rhs, <4 x float> %accum) #2
+// CHECK-LABEL: define <4 x float> @test_fmaq_order(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) #1 {
+// CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %rhs, <4 x float> %accum) #3
 // CHECK:   ret <4 x float> [[TMP6]]
 float32x4_t test_fmaq_order(float32x4_t accum, float32x4_t lhs, float32x4_t rhs) {
   return vfmaq_f32(accum, lhs, rhs);
@@ -32,7 +32,7 @@
   return vfma_n_f32(a, b, n);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
+// CHECK-LABEL: define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1
 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %n, i32 2
@@ -44,3 +44,6 @@
 float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
   return vfmaq_n_f32(a, b, n);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
Index: test/CodeGen/arm-neon-numeric-maxmin.c
===================================================================
--- test/CodeGen/arm-neon-numeric-maxmin.c
+++ test/CodeGen/arm-neon-numeric-maxmin.c
@@ -3,29 +3,32 @@
 #include <arm_neon.h>
 
 // CHECK-LABEL: define <2 x float> @test_vmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 {
-// CHECK:   [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %a, <2 x float> %b) #2
+// CHECK:   [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %a, <2 x float> %b) #3
 // CHECK:   ret <2 x float> [[VMAXNM_V2_I]]
 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
   return vmaxnm_f32(a, b);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
-// CHECK:   [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) #2
+// CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #1 {
+// CHECK:   [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) #3
 // CHECK:   ret <4 x float> [[VMAXNMQ_V2_I]]
 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
   return vmaxnmq_f32(a, b);
 }
 
 // CHECK-LABEL: define <2 x float> @test_vminnm_f32(<2 x float> %a, <2 x float> %b) #0 {
-// CHECK:   [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %a, <2 x float> %b) #2
+// CHECK:   [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %a, <2 x float> %b) #3
 // CHECK:   ret <2 x float> [[VMINNM_V2_I]]
 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
   return vminnm_f32(a, b);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
-// CHECK:   [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %a, <4 x float> %b) #2
+// CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #1 {
+// CHECK:   [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %a, <4 x float> %b) #3
 // CHECK:   ret <4 x float> [[VMINNMQ_V2_I]]
 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
   return vminnmq_f32(a, b);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
Index: test/CodeGen/arm-neon-vcvtX.c
===================================================================
--- test/CodeGen/arm-neon-vcvtX.c
+++ test/CodeGen/arm-neon-vcvtX.c
@@ -3,113 +3,116 @@
 #include <arm_neon.h>
 
 // CHECK-LABEL: define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTA_S32_V1_I]]
 int32x2_t test_vcvta_s32_f32(float32x2_t a) {
   return vcvta_s32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTA_U32_V1_I]]
 uint32x2_t test_vcvta_u32_f32(float32x2_t a) {
   return vcvta_u32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTAQ_S32_V1_I]]
 int32x4_t test_vcvtaq_s32_f32(float32x4_t a) {
   return vcvtaq_s32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTAQ_U32_V1_I]]
 uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) {
   return vcvtaq_u32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTN_S32_V1_I]]
 int32x2_t test_vcvtn_s32_f32(float32x2_t a) {
   return vcvtn_s32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTN_U32_V1_I]]
 uint32x2_t test_vcvtn_u32_f32(float32x2_t a) {
   return vcvtn_u32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTNQ_S32_V1_I]]
 int32x4_t test_vcvtnq_s32_f32(float32x4_t a) {
   return vcvtnq_s32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTNQ_U32_V1_I]]
 uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) {
   return vcvtnq_u32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTP_S32_V1_I]]
 int32x2_t test_vcvtp_s32_f32(float32x2_t a) {
   return vcvtp_s32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTP_U32_V1_I]]
 uint32x2_t test_vcvtp_u32_f32(float32x2_t a) {
   return vcvtp_u32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTPQ_S32_V1_I]]
 int32x4_t test_vcvtpq_s32_f32(float32x4_t a) {
   return vcvtpq_s32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTPQ_U32_V1_I]]
 uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) {
   return vcvtpq_u32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTM_S32_V1_I]]
 int32x2_t test_vcvtm_s32_f32(float32x2_t a) {
   return vcvtm_s32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTM_U32_V1_I]]
 uint32x2_t test_vcvtm_u32_f32(float32x2_t a) {
   return vcvtm_u32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTMQ_S32_V1_I]]
 int32x4_t test_vcvtmq_s32_f32(float32x4_t a) {
   return vcvtmq_s32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTMQ_U32_V1_I]]
 uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) {
   return vcvtmq_u32_f32(a);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
Index: test/CodeGen/arm64_vdupq_n_f64.c
===================================================================
--- test/CodeGen/arm64_vdupq_n_f64.c
+++ test/CodeGen/arm64_vdupq_n_f64.c
@@ -44,7 +44,7 @@
   return vmovq_n_f64(w);
 }
 
-// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a1) #0 {
+// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a1) #1 {
 // CHECK:   [[TMP0:%.*]] = load half, half* %a1, align 2
 // CHECK:   [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
 // CHECK:   [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
@@ -76,3 +76,5 @@
   return vmovq_n_f16(*a1);
 }
 
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="128"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="64"
Index: test/CodeGen/x86-vector-width.c
===================================================================
--- /dev/null
+++ test/CodeGen/x86-vector-width.c
@@ -0,0 +1,61 @@
+// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -emit-llvm %s -o - | FileCheck %s
+
+typedef signed long long V2LLi __attribute__((vector_size(16)));
+typedef signed long long V4LLi __attribute__((vector_size(32)));
+
+V2LLi ret_128();
+V4LLi ret_256();
+void arg_128(V2LLi);
+void arg_256(V4LLi);
+
+// Make sure return type forces a min-legal-width
+V2LLi foo(void) {
+  return (V2LLi){ 0, 0 };
+}
+
+V4LLi goo(void) {
+  return (V4LLi){ 0, 0 };
+}
+
+// Make sure return type of called function forces a min-legal-width
+void hoo(void) {
+  V2LLi tmp_V2LLi;
+  tmp_V2LLi = ret_128();
+}
+
+void joo(void) {
+  V4LLi tmp_V4LLi;
+  tmp_V4LLi = ret_256();
+}
+
+// Make sure arg type of called function forces a min-legal-width
+void koo(void) {
+  V2LLi tmp_V2LLi;
+  arg_128(tmp_V2LLi);
+}
+
+void loo(void) {
+  V4LLi tmp_V4LLi;
+  arg_256(tmp_V4LLi);
+}
+
+// Make sure arg type of our function forces a min-legal-width
+void moo(V2LLi x) {
+
+}
+
+void noo(V4LLi x) {
+
+}
+
+// CHECK: {{.*}}@foo{{.*}} #0
+// CHECK: {{.*}}@goo{{.*}} #1
+// CHECK: {{.*}}@hoo{{.*}} #0
+// CHECK: {{.*}}@joo{{.*}} #1
+// CHECK: {{.*}}@koo{{.*}} #0
+// CHECK: {{.*}}@loo{{.*}} #1
+// CHECK: {{.*}}@moo{{.*}} #0
+// CHECK: {{.*}}@noo{{.*}} #1
+
+// CHECK: #0 = {{.*}}"min-legal-vector-width"="128"
+// CHECK: #1 = {{.*}}"min-legal-vector-width"="256"
Index: test/CodeGenOpenCL/fpmath.cl
===================================================================
--- test/CodeGenOpenCL/fpmath.cl
+++ test/CodeGenOpenCL/fpmath.cl
@@ -16,7 +16,7 @@
 
 float4 spvectordiv(float4 a, float4 b) {
   // CHECK: @spvectordiv
-  // CHECK: #[[ATTR]]
+  // CHECK: #[[ATTR2:[0-9]+]]
   // CHECK: fdiv{{.*}},
   // NODIVOPT: !fpmath ![[MD]]
   // DIVOPT-NOT: !fpmath ![[MD]]
@@ -45,7 +45,11 @@
 #endif
 
 // CHECK: attributes #[[ATTR]] = {
-// NODIVOPT: "correctly-rounded-divide-sqrt-fp-math"="false"
-// DIVOPT: "correctly-rounded-divide-sqrt-fp-math"="true"
-// CHECK: }
+// NODIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="false"
+// DIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="true"
+// CHECK-SAME: }
+// CHECK: attributes #[[ATTR2]] = {
+// NODIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="false"
+// DIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="true"
+// CHECK-SAME: }
 // NODIVOPT: ![[MD]] = !{float 2.500000e+00}