Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -497,7 +497,8 @@
   // 4. Width of vector arguments and return types for this function.
   // 5. Width of vector aguments and return types for functions called by this
   //    function.
-  CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth));
+  if (LargestVectorWidth != 0)
+    CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth));
 
   // Add vscale_range attribute if appropriate.
   Optional<std::pair<unsigned, unsigned>> VScaleRange =
Index: clang/test/CodeGen/aarch64-neon-ldst-one.c
===================================================================
--- clang/test/CodeGen/aarch64-neon-ldst-one.c
+++ clang/test/CodeGen/aarch64-neon-ldst-one.c
@@ -6802,4 +6802,4 @@
 
 // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="128"
 // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="64"
-// CHECK: attributes #2 ={{.*}}"min-legal-vector-width"="0"
+// CHECK-NOT: "min-legal-vector-width"="0"
Index: clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
===================================================================
--- clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
+++ clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
@@ -47,7 +47,7 @@
   return vmul_n_f64(a, b);
 }
 
-// CHECK-LABEL: define{{.*}} float @test_vmulxs_lane_f32(float noundef %a, <2 x float> noundef %b) #0 {
+// CHECK-LABEL: define{{.*}} float @test_vmulxs_lane_f32(float noundef %a, <2 x float> noundef %b) #2 {
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1
 // CHECK:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGET_LANE]])
 // CHECK:   ret float [[VMULXS_F32_I]]
@@ -55,7 +55,7 @@
   return vmulxs_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} float @test_vmulxs_laneq_f32(float noundef %a, <4 x float> noundef %b) #1 {
+// CHECK-LABEL: define{{.*}} float @test_vmulxs_laneq_f32(float noundef %a, <4 x float> noundef %b) #2 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3
 // CHECK:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGETQ_LANE]])
 // CHECK:   ret float [[VMULXS_F32_I]]
@@ -63,7 +63,7 @@
   return vmulxs_laneq_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} double @test_vmulxd_lane_f64(double noundef %a, <1 x double> noundef %b) #0 {
+// CHECK-LABEL: define{{.*}} double @test_vmulxd_lane_f64(double noundef %a, <1 x double> noundef %b) #2 {
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0
 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGET_LANE]])
 // CHECK:   ret double [[VMULXD_F64_I]]
@@ -71,7 +71,7 @@
   return vmulxd_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} double @test_vmulxd_laneq_f64(double noundef %a, <2 x double> noundef %b) #1 {
+// CHECK-LABEL: define{{.*}} double @test_vmulxd_laneq_f64(double noundef %a, <2 x double> noundef %b) #2 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGETQ_LANE]])
 // CHECK:   ret double [[VMULXD_F64_I]]
@@ -79,7 +79,7 @@
   return vmulxd_laneq_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64(<1 x double> noundef %a, <1 x double> noundef %b) #0 {
+// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64(<1 x double> noundef %a, <1 x double> noundef %b) #2 {
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
 // CHECK:   [[VGET_LANE6:%.*]] = extractelement <1 x double> %b, i32 0
 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE6]])
@@ -90,7 +90,7 @@
 }
 
 
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_0(<1 x double> noundef %a, <2 x double> noundef %b) #1 {
+// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_0(<1 x double> noundef %a, <2 x double> noundef %b) #2 {
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 0
 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
@@ -100,7 +100,7 @@
   return vmulx_laneq_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_1(<1 x double> noundef %a, <2 x double> noundef %b) #1 {
+// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_1(<1 x double> noundef %a, <2 x double> noundef %b) #2 {
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
@@ -204,7 +204,7 @@
   return vfms_laneq_f64(a, b, v, 0);
 }
 
-// CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #0 {
+// CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #2 {
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0
@@ -215,7 +215,7 @@
   return vqdmullh_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #0 {
+// CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #2 {
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
 // CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGET_LANE]])
 // CHECK:   ret i64 [[VQDMULLS_S32_I]]
@@ -223,7 +223,7 @@
   return vqdmulls_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #1 {
+// CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #2 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0
@@ -234,7 +234,7 @@
   return vqdmullh_laneq_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #1 {
+// CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #2 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
 // CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGETQ_LANE]])
 // CHECK:   ret i64 [[VQDMULLS_S32_I]]
@@ -242,7 +242,7 @@
   return vqdmulls_laneq_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #0 {
+// CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #2 {
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0
@@ -253,7 +253,7 @@
   return vqdmulhh_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #0 {
+// CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #2 {
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
 // CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGET_LANE]])
 // CHECK:   ret i32 [[VQDMULHS_S32_I]]
@@ -262,7 +262,7 @@
 }
 
 
-// CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #1 {
+// CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #2 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0
@@ -274,7 +274,7 @@
 }
 
 
-// CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #1 {
+// CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #2 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
 // CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGETQ_LANE]])
 // CHECK:   ret i32 [[VQDMULHS_S32_I]]
@@ -282,7 +282,7 @@
   return vqdmulhs_laneq_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #0 {
+// CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #2 {
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0
@@ -293,7 +293,7 @@
   return vqrdmulhh_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #0 {
+// CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #2 {
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
 // CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGET_LANE]])
 // CHECK:   ret i32 [[VQRDMULHS_S32_I]]
@@ -302,7 +302,7 @@
 }
 
 
-// CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #1 {
+// CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #2 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0
@@ -314,7 +314,7 @@
 }
 
 
-// CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #1 {
+// CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #2 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
 // CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGETQ_LANE]])
 // CHECK:   ret i32 [[VQRDMULHS_S32_I]]
@@ -406,7 +406,7 @@
   return vqdmlsls_laneq_s32(a, b, c, 3);
 }
 
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64_0() #0 {
+// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64_0() #2 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
 // CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
@@ -425,7 +425,7 @@
       return result;
 }
 
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_2() #1 {
+// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_2() #2 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
 // CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
Index: clang/test/CodeGen/aarch64-poly128.c
===================================================================
--- clang/test/CodeGen/aarch64-poly128.c
+++ clang/test/CodeGen/aarch64-poly128.c
@@ -28,8 +28,8 @@
 // CHECK-LABEL: define {{[^@]+}}@test_vldrq_p128
 // CHECK-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP2:%.*]] = load i128, ptr [[PTR]], align 16
-// CHECK-NEXT:    ret i128 [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr [[PTR]], align 16
+// CHECK-NEXT:    ret i128 [[TMP0]]
 //
 poly128_t test_vldrq_p128(poly128_t * ptr) {
   return vldrq_p128(ptr);
@@ -39,9 +39,9 @@
 // CHECK-LABEL: define {{[^@]+}}@test_ld_st_p128
 // CHECK-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP2:%.*]] = load i128, ptr [[PTR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr [[PTR]], align 16
 // CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i128, ptr [[PTR]], i64 1
-// CHECK-NEXT:    store i128 [[TMP2]], ptr [[ADD_PTR]], align 16
+// CHECK-NEXT:    store i128 [[TMP0]], ptr [[ADD_PTR]], align 16
 // CHECK-NEXT:    ret void
 //
 void test_ld_st_p128(poly128_t * ptr) {
@@ -61,7 +61,7 @@
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vmull_high_p64
-// CHECK-SAME: (<2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-SAME: (<2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[SHUFFLE_I5:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[A]], <1 x i32> <i32 1>
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[SHUFFLE_I5]] to i64
@@ -76,7 +76,7 @@
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_s8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to i128
 // CHECK-NEXT:    ret i128 [[TMP0]]
Index: clang/test/CodeGen/aarch64-poly64.c
===================================================================
--- clang/test/CodeGen/aarch64-poly64.c
+++ clang/test/CodeGen/aarch64-poly64.c
@@ -538,4 +538,4 @@
 
 // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
 // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
-// CHECK: attributes #2 ={{.*}}"min-legal-vector-width"="0"
+// CHECK-NOT: "min-legal-vector-width"="0"
Index: clang/test/CodeGen/regcall2.c
===================================================================
--- clang/test/CodeGen/regcall2.c
+++ clang/test/CodeGen/regcall2.c
@@ -21,7 +21,7 @@
 // FIXME: Do we need to change for Windows?
 // Win: define dso_local x86_regcallcc void @__regcall3__foo(ptr noalias sret(%struct.__sVector) align 64 %agg.result, i32 noundef %a) #0
 // Win: define dso_local x86_regcallcc double @__regcall3__bar(ptr noundef %a) #0
-// Win: attributes #0 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+avx512f,+avx512vl,+crc32,+cx8,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
+// Win: attributes #0 = { noinline nounwind optnone "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+avx512f,+avx512vl,+crc32,+cx8,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
 
 // Lin: define dso_local x86_regcallcc %struct.__sVector @__regcall3__foo(i32 noundef %a) #0
 // Lin: define dso_local x86_regcallcc double @__regcall3__bar([4 x <8 x double>] %a.coerce0, [4 x <16 x float>] %a.coerce1) #0
Index: clang/test/CodeGenCXX/arm-generated-fn-attr.cpp
===================================================================
--- clang/test/CodeGenCXX/arm-generated-fn-attr.cpp
+++ clang/test/CodeGenCXX/arm-generated-fn-attr.cpp
@@ -23,14 +23,14 @@
 
 // CHECK: define {{.*}} @__cxx_global_var_init() [[ATTR1:#[0-9]+]]
 // CHECK: define {{.*}} @__clang_call_terminate({{.*}}) [[ATTR2:#[0-9]+]]
-// CHECK: define {{.*}} @_ZTW4var1() [[ATTR3:#[0-9]+]]
-// CHECK: define {{.*}} @_ZTW4var2() [[ATTR3]]
+// CHECK: define {{.*}} @_ZTW4var1() [[ATTR1]]
+// CHECK: define {{.*}} @_ZTW4var2() [[ATTR1]]
 // CHECK: define {{.*}} @__tls_init() [[ATTR1]]
 
-// CHECK-PACBTI: attributes [[ATTR1]] = { {{.*}}"target-features"="+armv8.1-m.main,+pacbti,+thumb-mode"{{.*}} }
-// CHECK-PACBTI: attributes [[ATTR2]] = { {{.*}}"target-features"="+armv8.1-m.main,+pacbti,+thumb-mode"{{.*}} }
-// CHECK-PACBTI: attributes [[ATTR3]] = { {{.*}}"target-features"="+armv8.1-m.main,+pacbti,+thumb-mode"{{.*}} }
+
+// CHECK-PACBTI: attributes [[ATTR1]] = { {{.*}}"target-features"="+armv8.1-m.main,+pacbti,+thumb-mode" }
+// CHECK-PACBTI: attributes [[ATTR2]] = { {{.*}}"target-features"="+armv8.1-m.main,+pacbti,+thumb-mode" }
+
 
 // CHECK-NOPACBTI: attributes [[ATTR1]] = { {{.*}}"target-features"="+armv8.1-m.main,+thumb-mode,-pacbti"{{.*}} }
 // CHECK-NOPACBTI: attributes [[ATTR2]] = { {{.*}}"target-features"="+armv8.1-m.main,+thumb-mode,-pacbti"{{.*}} }
-// CHECK-NOPACBTI: attributes [[ATTR3]] = { {{.*}}"target-features"="+armv8.1-m.main,+thumb-mode,-pacbti"{{.*}} }
Index: clang/test/CodeGenCXX/dllexport-ctor-closure-nested.cpp
===================================================================
--- clang/test/CodeGenCXX/dllexport-ctor-closure-nested.cpp
+++ clang/test/CodeGenCXX/dllexport-ctor-closure-nested.cpp
@@ -17,4 +17,4 @@
 };
 
 // CHECK-LABEL: $"??1HasImplicitDtor1@@QAE@XZ" = comdat any
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorClosureInner@CtorClosureOuter@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorClosureInner@CtorClosureOuter@@QAEXXZ"({{.*}}) comdat
Index: clang/test/CodeGenCXX/dllexport-ctor-closure.cpp
===================================================================
--- clang/test/CodeGenCXX/dllexport-ctor-closure.cpp
+++ clang/test/CodeGenCXX/dllexport-ctor-closure.cpp
@@ -5,7 +5,7 @@
 
 struct CtorWithClosure {
   __declspec(dllexport) CtorWithClosure(...) {}
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorWithClosure@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorWithClosure@@QAEXXZ"({{.*}}) comdat
 // CHECK:   %[[this_addr:.*]] = alloca ptr, align 4
 // CHECK:   store ptr %this, ptr %[[this_addr]], align 4
 // CHECK:   %[[this:.*]] = load ptr, ptr %[[this_addr]]
@@ -17,7 +17,7 @@
   __declspec(dllexport) CtorWithClosureOutOfLine(...);
 };
 CtorWithClosureOutOfLine::CtorWithClosureOutOfLine(...) {}
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorWithClosureOutOfLine@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorWithClosureOutOfLine@@QAEXXZ"({{.*}}) comdat
 
 #define DELETE_IMPLICIT_MEMBERS(ClassName) \
     ClassName(ClassName &&) = delete; \
@@ -28,7 +28,7 @@
 struct __declspec(dllexport) ClassWithClosure {
   DELETE_IMPLICIT_MEMBERS(ClassWithClosure);
   ClassWithClosure(...) {}
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FClassWithClosure@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FClassWithClosure@@QAEXXZ"({{.*}}) comdat
 // CHECK:   %[[this_addr:.*]] = alloca ptr, align 4
 // CHECK:   store ptr %this, ptr %[[this_addr]], align 4
 // CHECK:   %[[this:.*]] = load ptr, ptr %[[this_addr]]
@@ -44,10 +44,10 @@
 extern template struct TemplateWithClosure<int>;
 template struct __declspec(dllexport) TemplateWithClosure<int>;
 
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$TemplateWithClosure@D@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$TemplateWithClosure@D@@QAEXXZ"({{.*}}) comdat
 // CHECK:   call {{.*}} @"??0?$TemplateWithClosure@D@@QAE@H@Z"({{.*}}, i32 noundef 1)
 
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$TemplateWithClosure@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$TemplateWithClosure@H@@QAEXXZ"({{.*}}) comdat
 // CHECK:   call {{.*}} @"??0?$TemplateWithClosure@H@@QAE@H@Z"({{.*}}, i32 noundef 4)
 
 template <typename T> struct __declspec(dllexport) ExportedTemplateWithClosure {
@@ -55,7 +55,7 @@
 };
 template <> ExportedTemplateWithClosure<int>::ExportedTemplateWithClosure(int); // Don't try to emit the closure for a declaration.
 template <> ExportedTemplateWithClosure<int>::ExportedTemplateWithClosure(int) {};
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$ExportedTemplateWithClosure@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$ExportedTemplateWithClosure@H@@QAEXXZ"({{.*}}) comdat
 // CHECK:   call {{.*}} @"??0?$ExportedTemplateWithClosure@H@@QAE@H@Z"({{.*}}, i32 noundef 4)
 
 struct __declspec(dllexport) NestedOuter {
@@ -67,8 +67,8 @@
   };
 };
 
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FNestedOuter@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FNestedInner@NestedOuter@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FNestedOuter@@QAEXXZ"({{.*}}) comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FNestedInner@NestedOuter@@QAEXXZ"({{.*}}) comdat
 
 struct HasDtor {
   ~HasDtor();
Index: clang/test/CodeGenCXX/dllexport.cpp
===================================================================
--- clang/test/CodeGenCXX/dllexport.cpp
+++ clang/test/CodeGenCXX/dllexport.cpp
@@ -535,7 +535,7 @@
 // MSVC2013-DAG: define weak_odr dso_local dllexport {{.+}} @"??4?$SomeTemplate@H@@Q{{.+}}0@A{{.+}}0@@Z"
 struct __declspec(dllexport) InheritFromTemplate : SomeTemplate<int> {};
 
-// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$SomeTemplate@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$SomeTemplate@H@@QAEXXZ"({{.*}}) comdat
 
 namespace PR23801 {
 template <typename>
@@ -552,7 +552,7 @@
 
 }
 //
-// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FB@PR23801@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FB@PR23801@@QAEXXZ"({{.*}}) comdat
 
 struct __declspec(dllexport) T {
   // Copy assignment operator:
Index: clang/test/OpenMP/amdgcn-attributes.cpp
===================================================================
--- clang/test/OpenMP/amdgcn-attributes.cpp
+++ clang/test/OpenMP/amdgcn-attributes.cpp
@@ -32,12 +32,12 @@
   return x + 1;
 }
 
-// DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
-// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
-// NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "kernel" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
-// UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
-
-// DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
-// NOIEEE: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// UNSAFEATOMIC: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
+// NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+// UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+
+// DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
+// NOIEEE: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// UNSAFEATOMIC: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
Index: clang/test/OpenMP/irbuilder_safelen.cpp
===================================================================
--- clang/test/OpenMP/irbuilder_safelen.cpp
+++ clang/test/OpenMP/irbuilder_safelen.cpp
@@ -123,8 +123,8 @@
   }
 }
 //.
-// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
-// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
 //.
 // CHECK: !0 = !{i32 1, !"wchar_size", i32 4}
 // CHECK: !1 = !{i32 7, !"openmp", i32 45}
Index: clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp
===================================================================
--- clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp
+++ clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp
@@ -123,8 +123,8 @@
   }
 }
 //.
-// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
-// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
 //.
 // CHECK: !0 = !{i32 1, !"wchar_size", i32 4}
 // CHECK: !1 = !{i32 7, !"openmp", i32 50}
Index: clang/test/OpenMP/irbuilder_simd_aligned.cpp
===================================================================
--- clang/test/OpenMP/irbuilder_simd_aligned.cpp
+++ clang/test/OpenMP/irbuilder_simd_aligned.cpp
@@ -162,8 +162,8 @@
   }
 }
 //.
-// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
-// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
 // CHECK: attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
 //.
 // CHECK: !0 = !{i32 1, !"wchar_size", i32 4}
Index: clang/test/OpenMP/irbuilder_simdlen.cpp
===================================================================
--- clang/test/OpenMP/irbuilder_simdlen.cpp
+++ clang/test/OpenMP/irbuilder_simdlen.cpp
@@ -123,8 +123,8 @@
   }
 }
 //.
-// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
-// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
 //.
 // CHECK: !0 = !{i32 1, !"wchar_size", i32 4}
 // CHECK: !1 = !{i32 7, !"openmp", i32 45}
Index: clang/test/OpenMP/irbuilder_simdlen_safelen.cpp
===================================================================
--- clang/test/OpenMP/irbuilder_simdlen_safelen.cpp
+++ clang/test/OpenMP/irbuilder_simdlen_safelen.cpp
@@ -123,8 +123,8 @@
   }
 }
 //.
-// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
-// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
 //.
 // CHECK: !0 = !{i32 1, !"wchar_size", i32 4}
 // CHECK: !1 = !{i32 7, !"openmp", i32 45}
Index: llvm/lib/Target/X86/X86TargetMachine.cpp
===================================================================
--- llvm/lib/Target/X86/X86TargetMachine.cpp
+++ llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -284,8 +284,12 @@
   }
 
   // Extract min-legal-vector-width attribute.
-  unsigned RequiredVectorWidth = UINT32_MAX;
+  unsigned RequiredVectorWidth = 0;
   Attribute MinLegalVecWidthAttr = F.getFnAttribute("min-legal-vector-width");
+
+  // FIXME: The point the subtarget is constructed is not well defined. The
+  // attribute propagation passes may modify the attribute later, so you may get
+  // a different subtarget at different points in the pipeline.
   if (MinLegalVecWidthAttr.isValid()) {
     StringRef Val = MinLegalVecWidthAttr.getValueAsString();
     unsigned Width;
@@ -294,6 +298,37 @@
       Key += Val;
       RequiredVectorWidth = Width;
     }
+  } else {
+    // FIXME: This reduction over vector return/argument vector size is
+    // effectively repeated in at least 5 places. If there should be an
+    // interaction between the argument types and the explicit
+    // min-legal-vector-width, it should be consistently applied in one location
+    // which is not the x86 subtarget constructor. A more reasonable approach
+    // would be to have attribute inference account for this, and treat an
+    // unannotated function consistently as 0 (but this requires test updates)
+    // (also if inference needs to account for explicit user attributes and the
+    // IR types, a separate x86 prefixed attribute would be better).
+    unsigned LargestVectorWidth = 0;
+    for (const Argument &A : F.args()) {
+      if (auto *VT = dyn_cast<VectorType>(A.getType())) {
+        LargestVectorWidth =
+          std::max((uint64_t)LargestVectorWidth,
+                   VT->getPrimitiveSizeInBits().getKnownMinSize());
+      }
+    }
+
+    // Update vector width based on return type.
+    if (auto *VT = dyn_cast<VectorType>(F.getReturnType())) {
+      LargestVectorWidth =
+        std::max((uint64_t)LargestVectorWidth,
+                 VT->getPrimitiveSizeInBits().getKnownMinSize());
+    }
+
+    if (LargestVectorWidth != 0) {
+      RequiredVectorWidth = LargestVectorWidth;
+      Key += 'm';
+      Key += llvm::utostr(RequiredVectorWidth);
+    }
   }
 
   // Add CPU to the Key.
Index: llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll
===================================================================
--- llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll
+++ llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll
@@ -18,7 +18,7 @@
 ;     }
 ; (relates to the testcase in PR50566)
 
-define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
+define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) #0 {
 ; DISABLED_MASKED_STRIDED-LABEL: 'test1'
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
@@ -75,7 +75,7 @@
 ;       y[i] = points[i*4 + 1];
 ;     }
 
-define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
+define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) #0 {
 ; DISABLED_MASKED_STRIDED-LABEL: 'test2'
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
@@ -142,7 +142,7 @@
 ;         x[i] = points[i*3];
 ;     }
 
-define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) {
+define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) #0 {
 ; DISABLED_MASKED_STRIDED-LABEL: 'test'
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx, align 2
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
@@ -192,3 +192,5 @@
 for.end:
   ret void
 }
+
+attributes #0 = { "min-legal-vector-width"="512" }
Index: llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll
===================================================================
--- llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll
+++ llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll
@@ -18,7 +18,7 @@
 ;     }
 ; (relates to the testcase in PR50566)
 
-define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
+define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) #0 {
 ; DISABLED_MASKED_STRIDED-LABEL: 'test1'
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2
@@ -75,7 +75,7 @@
 ;       points[i*4 + 1] = y[i];
 ;     }
 
-define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
+define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) #0 {
 ; DISABLED_MASKED_STRIDED-LABEL: 'test2'
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2
@@ -142,7 +142,7 @@
 ;         points[i*3] = x[i];
 ;     }
 
-define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) {
+define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) #0 {
 ; DISABLED_MASKED_STRIDED-LABEL: 'test'
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2
@@ -181,3 +181,5 @@
 for.end:
   ret void
 }
+
+attributes #0 = { "min-legal-vector-width"="512" }
Index: llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
===================================================================
--- llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
+++ llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
@@ -8,7 +8,7 @@
 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL
 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,SKX
 
-define i32 @masked_load() {
+define i32 @masked_load() #0 {
 ; SSE2-LABEL: 'masked_load'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
@@ -369,7 +369,7 @@
   ret i32 0
 }
 
-define i32 @masked_store() {
+define i32 @masked_store() #0 {
 ; SSE2-LABEL: 'masked_store'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
@@ -730,7 +730,7 @@
   ret i32 0
 }
 
-define i32 @masked_gather() {
+define i32 @masked_gather() #0 {
 ; SSE2-LABEL: 'masked_gather'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
@@ -953,7 +953,7 @@
   ret i32 0
 }
 
-define i32 @masked_scatter() {
+define i32 @masked_scatter() #0 {
 ; SSE2-LABEL: 'masked_scatter'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
@@ -1122,7 +1122,7 @@
   ret i32 0
 }
 
-define i32 @masked_expandload() {
+define i32 @masked_expandload() #0 {
 ; SSE2-LABEL: 'masked_expandload'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
@@ -1264,7 +1264,7 @@
   ret i32 0
 }
 
-define i32 @masked_compressstore() {
+define i32 @masked_compressstore() #0 {
 ; SSE2-LABEL: 'masked_compressstore'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
@@ -1460,7 +1460,7 @@
   ret i32 0
 }
 
-define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
+define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) #0 {
 ; SSE2-LABEL: 'test1'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
@@ -1486,7 +1486,7 @@
   ret <2 x double> %res
 }
 
-define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
+define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) #0 {
 ; SSE2-LABEL: 'test2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
@@ -1512,7 +1512,7 @@
   ret <4 x i32> %res
 }
 
-define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
+define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) #0 {
 ; SSE2-LABEL: 'test3'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
@@ -1538,7 +1538,7 @@
   ret void
 }
 
-define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
+define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) #0 {
 ; SSE2-LABEL: 'test4'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
@@ -1574,7 +1574,7 @@
   ret <8 x float> %res
 }
 
-define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
+define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) #0 {
 ; SSE2-LABEL: 'test5'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
@@ -1600,7 +1600,7 @@
   ret void
 }
 
-define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
+define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) #0 {
 ; SSE2-LABEL: 'test6'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
@@ -1626,7 +1626,7 @@
   ret void
 }
 
-define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
+define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) #0 {
 ; SSE2-LABEL: 'test7'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
@@ -1652,7 +1652,7 @@
   ret <2 x float> %res
 }
 
-define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
+define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) #0 {
 ; SSE2-LABEL: 'test8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
@@ -1773,7 +1773,7 @@
   ret <4 x i32> %res
 }
 
-define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
+define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) #0 {
 ; SSE2-LABEL: 'test_gather_16f32_const_mask'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
@@ -1817,7 +1817,7 @@
   ret <16 x float>%res
 }
 
-define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) {
+define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) #0 {
 ; SSE2-LABEL: 'test_gather_16f32_var_mask'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
@@ -1861,7 +1861,7 @@
   ret <16 x float>%res
 }
 
-define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
+define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) #0 {
 ; SSE2-LABEL: 'test_gather_16f32_ra_var_mask'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
@@ -1905,7 +1905,7 @@
   ret <16 x float>%res
 }
 
-define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) {
+define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) #0 {
 ; SSE2-LABEL: 'test_gather_16f32_const_mask2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer
@@ -1964,7 +1964,7 @@
   ret <16 x float>%res
 }
 
-define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
+define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) #0 {
 ; SSE2-LABEL: 'test_scatter_16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer
@@ -2022,7 +2022,7 @@
   ret void
 }
 
-define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
+define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) #0 {
 ; SSE2-LABEL: 'test_scatter_8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -2043,7 +2043,7 @@
   ret void
 }
 
-define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
+define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) #0 {
 ; SSE2-LABEL: 'test_scatter_4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -2068,7 +2068,7 @@
   ret void
 }
 
-define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) {
+define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) #0 {
 ; SSE2-LABEL: 'test_gather_4f32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
@@ -2118,7 +2118,7 @@
   ret <4 x float>%res
 }
 
-define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {
+define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) #0 {
 ; SSE2-LABEL: 'test_gather_4f32_const_mask'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
@@ -2411,3 +2411,5 @@
 declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>)
 declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>)
 declare void @llvm.masked.compressstore.v8i8(<8 x i8>, i8*, <8 x i1>)
+
+attributes #0 = { "min-legal-vector-width"="512" }
Index: llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
===================================================================
--- llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
+++ llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
@@ -8,7 +8,7 @@
 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL
 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,SKX
 
-define i32 @masked_load() {
+define i32 @masked_load() #0 {
 ; SSE2-LABEL: 'masked_load'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
@@ -369,7 +369,7 @@
   ret i32 0
 }
 
-define i32 @masked_store() {
+define i32 @masked_store() #0 {
 ; SSE2-LABEL: 'masked_store'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
@@ -730,7 +730,7 @@
   ret i32 0
 }
 
-define i32 @masked_gather() {
+define i32 @masked_gather() #0 {
 ; SSE2-LABEL: 'masked_gather'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
@@ -953,7 +953,7 @@
   ret i32 0
 }
 
-define i32 @masked_scatter() {
+define i32 @masked_scatter() #0 {
 ; SSE2-LABEL: 'masked_scatter'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
@@ -1122,7 +1122,7 @@
   ret i32 0
 }
 
-define i32 @masked_expandload() {
+define i32 @masked_expandload() #0 {
 ; SSE2-LABEL: 'masked_expandload'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
@@ -1264,7 +1264,7 @@
   ret i32 0
 }
 
-define i32 @masked_compressstore() {
+define i32 @masked_compressstore() #0 {
 ; SSE2-LABEL: 'masked_compressstore'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
@@ -1460,7 +1460,7 @@
   ret i32 0
 }
 
-define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
+define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) #0 {
 ; SSE2-LABEL: 'test1'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
@@ -1486,7 +1486,7 @@
   ret <2 x double> %res
 }
 
-define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
+define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) #0 {
 ; SSE2-LABEL: 'test2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
@@ -1512,7 +1512,7 @@
   ret <4 x i32> %res
 }
 
-define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
+define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) #0 {
 ; SSE2-LABEL: 'test3'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
@@ -1538,7 +1538,7 @@
   ret void
 }
 
-define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
+define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) #0 {
 ; SSE2-LABEL: 'test4'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
@@ -1574,7 +1574,7 @@
   ret <8 x float> %res
 }
 
-define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
+define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) #0 {
 ; SSE2-LABEL: 'test5'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
@@ -1600,7 +1600,7 @@
   ret void
 }
 
-define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
+define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) #0 {
 ; SSE2-LABEL: 'test6'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
@@ -1626,7 +1626,7 @@
   ret void
 }
 
-define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
+define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) #0 {
 ; SSE2-LABEL: 'test7'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
@@ -1652,7 +1652,7 @@
   ret <2 x float> %res
 }
 
-define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
+define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) #0 {
 ; SSE2-LABEL: 'test8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
@@ -1773,7 +1773,7 @@
   ret <4 x i32> %res
 }
 
-define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
+define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) #0 {
 ; SSE2-LABEL: 'test_gather_16f32_const_mask'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
@@ -1817,7 +1817,7 @@
   ret <16 x float>%res
 }
 
-define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) {
+define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) #0 {
 ; SSE2-LABEL: 'test_gather_16f32_var_mask'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
@@ -1861,7 +1861,7 @@
   ret <16 x float>%res
 }
 
-define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
+define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) #0 {
 ; SSE2-LABEL: 'test_gather_16f32_ra_var_mask'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
@@ -1905,7 +1905,7 @@
   ret <16 x float>%res
 }
 
-define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) {
+define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) #0 {
 ; SSE2-LABEL: 'test_gather_16f32_const_mask2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
@@ -1964,7 +1964,7 @@
   ret <16 x float>%res
 }
 
-define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
+define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) #0 {
 ; SSE2-LABEL: 'test_scatter_16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
@@ -2022,7 +2022,7 @@
   ret void
 }
 
-define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
+define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) #0 {
 ; SSE2-LABEL: 'test_scatter_8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -2043,7 +2043,7 @@
   ret void
 }
 
-define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
+define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) #0 {
 ; SSE2-LABEL: 'test_scatter_4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -2068,7 +2068,7 @@
   ret void
 }
 
-define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) {
+define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) #0 {
 ; SSE2-LABEL: 'test_gather_4f32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
@@ -2118,7 +2118,7 @@
   ret <4 x float>%res
 }
 
-define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {
+define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) #0 {
 ; SSE2-LABEL: 'test_gather_4f32_const_mask'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
@@ -2411,3 +2411,5 @@
 declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>)
 declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>)
 declare void @llvm.masked.compressstore.v8i8(<8 x i8>, i8*, <8 x i1>)
+
+attributes #0 = { "min-legal-vector-width"="512" }
Index: llvm/test/Analysis/CostModel/X86/powi.ll
===================================================================
--- llvm/test/Analysis/CostModel/X86/powi.ll
+++ llvm/test/Analysis/CostModel/X86/powi.ll
@@ -4,7 +4,7 @@
 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-linux-gnu -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2
 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-linux-gnu -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512
 
-define i32 @powi_var(i32 %arg) {
+define i32 @powi_var(i32 %arg) #0 {
 ; SSE-LABEL: 'powi_var'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 %arg)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 %arg)
@@ -72,7 +72,7 @@
   ret i32 poison
 }
 
-define i32 @powi_3() {
+define i32 @powi_3() #0 {
 ; SSE-LABEL: 'powi_3'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 3)
@@ -140,7 +140,7 @@
   ret i32 poison
 }
 
-define i32 @powi_n3() {
+define i32 @powi_n3() #0 {
 ; SSE-LABEL: 'powi_n3'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 -3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 -3)
@@ -208,7 +208,7 @@
   ret i32 poison
 }
 
-define i32 @powi_6() {
+define i32 @powi_6() #0 {
 ; SSE-LABEL: 'powi_6'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 6)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 6)
@@ -276,7 +276,7 @@
   ret i32 poison
 }
 
-define i32 @powi_16() {
+define i32 @powi_16() #0 {
 ; SSE-LABEL: 'powi_16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 16)
@@ -355,3 +355,5 @@
 declare <4 x double> @llvm.powi.v4f64(<4 x double>, i32)
 declare <8 x double> @llvm.powi.v8f64(<8 x double>, i32)
 declare <16 x double> @llvm.powi.v16f64(<16 x double>, i32)
+
+attributes #0 = { "min-legal-vector-width"="512" }
Index: llvm/test/CodeGen/X86/avx512-calling-conv.ll
===================================================================
--- llvm/test/CodeGen/X86/avx512-calling-conv.ll
+++ llvm/test/CodeGen/X86/avx512-calling-conv.ll
@@ -4,7 +4,7 @@
 ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx -fast-isel | FileCheck %s --check-prefix=FASTISEL
 
-define <16 x i1> @test1() {
+define <16 x i1> @test1() #0 {
 ; ALL_X64-LABEL: test1:
 ; ALL_X64:       ## %bb.0:
 ; ALL_X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
@@ -22,7 +22,7 @@
   ret <16 x i1> zeroinitializer
 }
 
-define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) {
+define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) #0 {
 ; ALL_X64-LABEL: test2:
 ; ALL_X64:       ## %bb.0:
 ; ALL_X64-NEXT:    vandps %xmm1, %xmm0, %xmm0
@@ -46,7 +46,7 @@
   ret <16 x i1> %c
 }
 
-define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) {
+define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) #0 {
 ; ALL_X64-LABEL: test3:
 ; ALL_X64:       ## %bb.0:
 ; ALL_X64-NEXT:    vandps %xmm1, %xmm0, %xmm0
@@ -70,7 +70,7 @@
   ret <8 x i1> %c
 }
 
-define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) {
+define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) #0 {
 ; ALL_X64-LABEL: test4:
 ; ALL_X64:       ## %bb.0:
 ; ALL_X64-NEXT:    vandps %xmm1, %xmm0, %xmm0
@@ -96,7 +96,7 @@
 
 declare <8 x i1> @func8xi1(<8 x i1> %a)
 
-define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) {
+define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) #0 {
 ; KNL-LABEL: test5:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    pushq %rax
@@ -160,7 +160,7 @@
 
 declare <16 x i1> @func16xi1(<16 x i1> %a)
 
-define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
+define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) #0 {
 ; KNL-LABEL: test6:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    pushq %rax
@@ -224,7 +224,7 @@
 
 declare <4 x i1> @func4xi1(<4 x i1> %a)
 
-define <4 x i32> @test7(<4 x i32>%a, <4 x i32>%b) {
+define <4 x i32> @test7(<4 x i32>%a, <4 x i32>%b) #0 {
 ; ALL_X64-LABEL: test7:
 ; ALL_X64:       ## %bb.0:
 ; ALL_X64-NEXT:    pushq %rax
@@ -265,7 +265,7 @@
   ret <4 x i32> %res
 }
 
-define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
+define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) #0 {
 ; KNL-LABEL: test7a:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    pushq %rax
@@ -319,7 +319,7 @@
   ret <8 x i1> %res
 }
 
-define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) {
+define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) #0 {
 ; ALL_X64-LABEL: test8:
 ; ALL_X64:       ## %bb.0:
 ; ALL_X64-NEXT:    testb $1, %dil
@@ -350,7 +350,7 @@
   ret <16 x i8> %res
 }
 
-define i1 @test9(double %a, double %b) {
+define i1 @test9(double %a, double %b) #0 {
 ; ALL_X64-LABEL: test9:
 ; ALL_X64:       ## %bb.0:
 ; ALL_X64-NEXT:    vucomisd %xmm0, %xmm1
@@ -373,7 +373,7 @@
   ret i1 %c
 }
 
-define i32 @test10(i32 %a, i32 %b, i1 %cond) {
+define i32 @test10(i32 %a, i32 %b, i1 %cond) #0 {
 ; ALL_X64-LABEL: test10:
 ; ALL_X64:       ## %bb.0:
 ; ALL_X64-NEXT:    movl %edi, %eax
@@ -400,7 +400,7 @@
   ret i32 %c
 }
 
-define i1 @test11(i32 %a, i32 %b) {
+define i1 @test11(i32 %a, i32 %b) #0 {
 ; ALL_X64-LABEL: test11:
 ; ALL_X64:       ## %bb.0:
 ; ALL_X64-NEXT:    cmpl %esi, %edi
@@ -423,7 +423,7 @@
   ret i1 %c
 }
 
-define i32 @test12(i32 %a1, i32 %a2, i32 %b1) {
+define i32 @test12(i32 %a1, i32 %a2, i32 %b1) #0 {
 ; ALL_X64-LABEL: test12:
 ; ALL_X64:       ## %bb.0:
 ; ALL_X64-NEXT:    pushq %rbp
@@ -519,7 +519,7 @@
   ret i32 %res1
 }
 
-define <1 x i1> @test13(ptr %foo) {
+define <1 x i1> @test13(ptr %foo) #0 {
 ; KNL-LABEL: test13:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    movzbl (%rdi), %eax
@@ -550,7 +550,7 @@
   ret <1 x i1> %bar
 }
 
-define void @test14(ptr %x) {
+define void @test14(ptr %x) #0 {
 ; KNL-LABEL: test14:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    pushq %rbx
@@ -610,7 +610,7 @@
 }
 declare <32 x i16> @test14_callee(<32 x i16>)
 
-define void @test15(ptr %x) {
+define void @test15(ptr %x) #0 {
 ; KNL-LABEL: test15:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    pushq %rbx
@@ -3539,7 +3539,7 @@
 }
 
 declare void @v2i1_mem_callee(<128 x i32> %x, <2 x i1> %y)
-define void @v2i1_mem(<128 x i32> %x, <2 x i1> %y) {
+define void @v2i1_mem(<128 x i32> %x, <2 x i1> %y) #0 {
 ; KNL-LABEL: v2i1_mem:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    subq $24, %rsp
@@ -3602,7 +3602,7 @@
 }
 
 declare void @v4i1_mem_callee(<128 x i32> %x, <4 x i1> %y)
-define void @v4i1_mem(<128 x i32> %x, <4 x i1> %y) {
+define void @v4i1_mem(<128 x i32> %x, <4 x i1> %y) #0 {
 ; KNL-LABEL: v4i1_mem:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    subq $24, %rsp
@@ -3665,7 +3665,7 @@
 }
 
 declare void @v8i1_mem_callee(<128 x i32> %x, <8 x i1> %y)
-define void @v8i1_mem(<128 x i32> %x, <8 x i1> %y) {
+define void @v8i1_mem(<128 x i32> %x, <8 x i1> %y) #0 {
 ; KNL-LABEL: v8i1_mem:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    subq $24, %rsp
@@ -3728,7 +3728,7 @@
 }
 
 declare void @v16i1_mem_callee(<128 x i32> %x, <16 x i1> %y)
-define void @v16i1_mem(<128 x i32> %x, <16 x i1> %y) {
+define void @v16i1_mem(<128 x i32> %x, <16 x i1> %y) #0 {
 ; KNL-LABEL: v16i1_mem:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    subq $24, %rsp
@@ -3791,7 +3791,7 @@
 }
 
 declare void @v32i1_mem_callee(<128 x i32> %x, <32 x i1> %y)
-define void @v32i1_mem(<128 x i32> %x, <32 x i1> %y) {
+define void @v32i1_mem(<128 x i32> %x, <32 x i1> %y) #0 {
 ; KNL-LABEL: v32i1_mem:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    pushq %rbp
@@ -3872,7 +3872,7 @@
 }
 
 declare void @v64i1_mem_callee(<128 x i32> %x, <64 x i1> %y)
-define void @v64i1_mem(<128 x i32> %x, <64 x i1> %y) {
+define void @v64i1_mem(<128 x i32> %x, <64 x i1> %y) #0 {
 ; KNL-LABEL: v64i1_mem:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    subq $472, %rsp ## imm = 0x1D8
@@ -4185,3 +4185,5 @@
   call void @v64i1_mem_callee(<128 x i32> %x, <64 x i1> %y)
   ret void
 }
+
+attributes #0 = { "min-legal-vector-width"="512" }
Index: llvm/test/CodeGen/X86/avx512bw-mask-op.ll
===================================================================
--- llvm/test/CodeGen/X86/avx512bw-mask-op.ll
+++ llvm/test/CodeGen/X86/avx512bw-mask-op.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
 
-define i32 @mask32(i32 %x) {
+define i32 @mask32(i32 %x) #0 {
 ; CHECK-LABEL: mask32:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
@@ -16,7 +16,7 @@
   ret i32 %ret
 }
 
-define i64 @mask64(i64 %x) {
+define i64 @mask64(i64 %x) #0 {
 ; CHECK-LABEL: mask64:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
@@ -35,7 +35,7 @@
   ret i64 %ret
 }
 
-define void @mask32_mem(ptr %ptr) {
+define void @mask32_mem(ptr %ptr) #0 {
 ; CHECK-LABEL: mask32_mem:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    kmovd (%rdi), %k0
@@ -53,7 +53,7 @@
   ret void
 }
 
-define void @mask64_mem(ptr %ptr) {
+define void @mask64_mem(ptr %ptr) #0 {
 ; CHECK-LABEL: mask64_mem:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    kmovq (%rdi), %k0
@@ -75,7 +75,7 @@
   ret void
 }
 
-define i32 @mand32(i32 %x, i32 %y) {
+define i32 @mand32(i32 %x, i32 %y) #0 {
 ; CHECK-LABEL: mand32:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
@@ -90,7 +90,7 @@
   ret i32 %ret
 }
 
-define i32 @mand32_mem(ptr %x, ptr %y) {
+define i32 @mand32_mem(ptr %x, ptr %y) #0 {
 ; CHECK-LABEL: mand32_mem:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    kmovd (%rdi), %k0
@@ -107,7 +107,7 @@
   ret i32 %ret
 }
 
-define i64 @mand64(i64 %x, i64 %y) {
+define i64 @mand64(i64 %x, i64 %y) #0 {
 ; CHECK-LABEL: mand64:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
@@ -122,7 +122,7 @@
   ret i64 %ret
 }
 
-define i64 @mand64_mem(ptr %x, ptr %y) {
+define i64 @mand64_mem(ptr %x, ptr %y) #0 {
 ; CHECK-LABEL: mand64_mem:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    kmovq (%rdi), %k0
@@ -139,7 +139,7 @@
   ret i64 %ret
 }
 
-define i32 @test_v32i1_add(i32 %x, i32 %y) {
+define i32 @test_v32i1_add(i32 %x, i32 %y) #0 {
 ; CHECK-LABEL: test_v32i1_add:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
@@ -152,7 +152,7 @@
   ret i32 %ret
 }
 
-define i32 @test_v32i1_sub(i32 %x, i32 %y) {
+define i32 @test_v32i1_sub(i32 %x, i32 %y) #0 {
 ; CHECK-LABEL: test_v32i1_sub:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
@@ -165,7 +165,7 @@
   ret i32 %ret
 }
 
-define i32 @test_v32i1_mul(i32 %x, i32 %y) {
+define i32 @test_v32i1_mul(i32 %x, i32 %y) #0 {
 ; CHECK-LABEL: test_v32i1_mul:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
@@ -178,7 +178,7 @@
   ret i32 %ret
 }
 
-define i64 @test_v64i1_add(i64 %x, i64 %y) {
+define i64 @test_v64i1_add(i64 %x, i64 %y) #0 {
 ; CHECK-LABEL: test_v64i1_add:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
@@ -191,7 +191,7 @@
   ret i64 %ret
 }
 
-define i64 @test_v64i1_sub(i64 %x, i64 %y) {
+define i64 @test_v64i1_sub(i64 %x, i64 %y) #0 {
 ; CHECK-LABEL: test_v64i1_sub:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
@@ -204,7 +204,7 @@
   ret i64 %ret
 }
 
-define i64 @test_v64i1_mul(i64 %x, i64 %y) {
+define i64 @test_v64i1_mul(i64 %x, i64 %y) #0 {
 ; CHECK-LABEL: test_v64i1_mul:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
@@ -217,7 +217,7 @@
   ret i64 %ret
 }
 
-define <32 x i1> @bitcast_f32_to_v32i1(float %x) {
+define <32 x i1> @bitcast_f32_to_v32i1(float %x) #0 {
 ; CHECK-LABEL: bitcast_f32_to_v32i1:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    vmovd %xmm0, %eax
@@ -228,7 +228,7 @@
   ret <32 x i1> %a
 }
 
-define <64 x i1> @bitcast_f64_to_v64i1(double %x) {
+define <64 x i1> @bitcast_f64_to_v64i1(double %x) #0 {
 ; CHECK-LABEL: bitcast_f64_to_v64i1:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    vmovq %xmm0, %rax
@@ -239,7 +239,7 @@
   ret <64 x i1> %a
 }
 
-define float @bitcast_v32i1_to_f32(<32 x i1> %x) {
+define float @bitcast_v32i1_to_f32(<32 x i1> %x) #0 {
 ; CHECK-LABEL: bitcast_v32i1_to_f32:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    vpsllw $7, %ymm0, %ymm0
@@ -251,7 +251,7 @@
   ret float %a
 }
 
-define double @bitcast_v64i1_to_f64(<64 x i1> %x) {
+define double @bitcast_v64i1_to_f64(<64 x i1> %x) #0 {
 ; CHECK-LABEL: bitcast_v64i1_to_f64:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    vpsllw $7, %zmm0, %zmm0
@@ -264,3 +264,5 @@
   ret double %a
 }
 
+
+attributes #0 = { "min-legal-vector-width" = "512" }
Index: llvm/test/CodeGen/X86/avx512fp16-subv-broadcast-fp16.ll
===================================================================
--- llvm/test/CodeGen/X86/avx512fp16-subv-broadcast-fp16.ll
+++ llvm/test/CodeGen/X86/avx512fp16-subv-broadcast-fp16.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512fp16 | FileCheck %s
 
-define dso_local void @test_v8f16_v32f16(ptr %x_addr, ptr %y_addr) {
+define dso_local void @test_v8f16_v32f16(ptr %x_addr, ptr %y_addr) #0 {
 ; CHECK-LABEL: test_v8f16_v32f16:
 ; CHECK:       ## %bb.0: ## %entry
 ; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
@@ -15,7 +15,7 @@
   ret void
 }
 
-define dso_local void @test_v8f16_v16f16(ptr %x_addr, ptr %y_addr) {
+define dso_local void @test_v8f16_v16f16(ptr %x_addr, ptr %y_addr) #0 {
 ; CHECK-LABEL: test_v8f16_v16f16:
 ; CHECK:       ## %bb.0: ## %entry
 ; CHECK-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
@@ -29,7 +29,7 @@
   ret void
 }
 
-define dso_local void @test_v16f16_v32f16(ptr %x_addr, ptr %y_addr) {
+define dso_local void @test_v16f16_v32f16(ptr %x_addr, ptr %y_addr) #0 {
 ; CHECK-LABEL: test_v16f16_v32f16:
 ; CHECK:       ## %bb.0: ## %entry
 ; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
@@ -42,3 +42,5 @@
   store <32 x half> %shuffle.i58, ptr %y_addr, align 64
   ret void
 }
+
+attributes #0 = { "min-legal-vector-width" = "512" }
Index: llvm/test/CodeGen/X86/perm.avx512-false-deps.ll
===================================================================
--- llvm/test/CodeGen/X86/perm.avx512-false-deps.ll
+++ llvm/test/CodeGen/X86/perm.avx512-false-deps.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=+false-deps-perm -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=ENABLE
 ; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=-false-deps-perm -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=DISABLE
 
-define <4 x i64> @permq_ri_256(<4 x i64> %a0) {
+define <4 x i64> @permq_ri_256(<4 x i64> %a0) #0 {
 ; ENABLE-LABEL: permq_ri_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -27,7 +27,7 @@
   ret <4 x i64> %res
 }
 
-define <4 x i64> @permq_rr_256(<4 x i64> %a0, <4 x i64> %idx) {
+define <4 x i64> @permq_rr_256(<4 x i64> %a0, <4 x i64> %idx) #0 {
 ; ENABLE-LABEL: permq_rr_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -59,7 +59,7 @@
   ret <4 x i64> %res
 }
 
-define <4 x i64> @permq_rm_256(ptr %p0, <4 x i64> %idx) {
+define <4 x i64> @permq_rm_256(ptr %p0, <4 x i64> %idx) #0 {
 ; ENABLE-LABEL: permq_rm_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -85,7 +85,7 @@
   ret <4 x i64> %res
 }
 
-define <4 x i64> @permq_mi_256(ptr %p0) {
+define <4 x i64> @permq_mi_256(ptr %p0) #0 {
 ; ENABLE-LABEL: permq_mi_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -108,7 +108,7 @@
   ret <4 x i64> %2
 }
 
-define <4 x i64> @permq_broadcast_256(ptr %p0, <4 x i64> %idx) {
+define <4 x i64> @permq_broadcast_256(ptr %p0, <4 x i64> %idx) #0 {
 ; ENABLE-LABEL: permq_broadcast_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -140,7 +140,7 @@
   ret <4 x i64> %res
 }
 
-define <4 x i64> @permq_maskz_256(<4 x i64> %a0, <4 x i64> %idx, ptr %mask) {
+define <4 x i64> @permq_maskz_256(<4 x i64> %a0, <4 x i64> %idx, ptr %mask) #0 {
 ; ENABLE-LABEL: permq_maskz_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -174,7 +174,7 @@
 declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>)
 declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
 
-define <8 x i64> @permq_rr_512(<8 x i64> %a0, <8 x i64> %idx) {
+define <8 x i64> @permq_rr_512(<8 x i64> %a0, <8 x i64> %idx) #0 {
 ; ENABLE-LABEL: permq_rr_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -206,7 +206,7 @@
   ret <8 x i64> %res
 }
 
-define <8 x i64> @permq_rm_512(ptr %p0, <8 x i64> %idx) {
+define <8 x i64> @permq_rm_512(ptr %p0, <8 x i64> %idx) #0 {
 ; ENABLE-LABEL: permq_rm_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -232,7 +232,7 @@
   ret <8 x i64> %res
 }
 
-define <8 x i64> @permq_broadcast_512(ptr %p0, <8 x i64> %idx) {
+define <8 x i64> @permq_broadcast_512(ptr %p0, <8 x i64> %idx) #0 {
 ; ENABLE-LABEL: permq_broadcast_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -264,7 +264,7 @@
   ret <8 x i64> %res
 }
 
-define <8 x i64> @permq_maskz_512(<8 x i64> %a0, <8 x i64> %idx, ptr %mask) {
+define <8 x i64> @permq_maskz_512(<8 x i64> %a0, <8 x i64> %idx, ptr %mask) #0 {
 ; ENABLE-LABEL: permq_maskz_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -298,7 +298,7 @@
 declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>)
 declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
 
-define <8 x i32> @permd_rr_256(<8 x i32> %a0, <8 x i32> %idx) {
+define <8 x i32> @permd_rr_256(<8 x i32> %a0, <8 x i32> %idx) #0 {
 ; ENABLE-LABEL: permd_rr_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -330,7 +330,7 @@
   ret <8 x i32> %res
 }
 
-define <8 x i32> @permd_rm_256(ptr %p0, <8 x i32> %idx) {
+define <8 x i32> @permd_rm_256(ptr %p0, <8 x i32> %idx) #0 {
 ; ENABLE-LABEL: permd_rm_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -356,7 +356,7 @@
   ret <8 x i32> %res
 }
 
-define <8 x i32> @permd_broadcast_256(ptr %p0, <8 x i32> %idx) {
+define <8 x i32> @permd_broadcast_256(ptr %p0, <8 x i32> %idx) #0 {
 ; ENABLE-LABEL: permd_broadcast_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -388,7 +388,7 @@
   ret <8 x i32> %res
 }
 
-define <8 x i32> @permd_maskz_256(<8 x i32> %a0, <8 x i32> %idx, ptr %mask) {
+define <8 x i32> @permd_maskz_256(<8 x i32> %a0, <8 x i32> %idx, ptr %mask) #0 {
 ; ENABLE-LABEL: permd_maskz_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -421,7 +421,7 @@
 
 declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
 
-define <16 x i32> @permd_rr_512(<16 x i32> %a0, <16 x i32> %idx) {
+define <16 x i32> @permd_rr_512(<16 x i32> %a0, <16 x i32> %idx) #0 {
 ; ENABLE-LABEL: permd_rr_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -453,7 +453,7 @@
   ret <16 x i32> %res
 }
 
-define <16 x i32> @permd_rm_512(ptr %p0, <16 x i32> %idx) {
+define <16 x i32> @permd_rm_512(ptr %p0, <16 x i32> %idx) #0 {
 ; ENABLE-LABEL: permd_rm_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -479,7 +479,7 @@
   ret <16 x i32> %res
 }
 
-define <16 x i32> @permd_broadcast_512(ptr %p0, <16 x i32> %idx) {
+define <16 x i32> @permd_broadcast_512(ptr %p0, <16 x i32> %idx) #0 {
 ; ENABLE-LABEL: permd_broadcast_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -511,7 +511,7 @@
   ret <16 x i32> %res
 }
 
-define <16 x i32> @permd_maskz_512(<16 x i32> %a0, <16 x i32> %idx, ptr %mask) {
+define <16 x i32> @permd_maskz_512(<16 x i32> %a0, <16 x i32> %idx, ptr %mask) #0 {
 ; ENABLE-LABEL: permd_maskz_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -544,7 +544,7 @@
 
 declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
-define <4 x double> @permpd_ri_256(<4 x double> %a0) {
+define <4 x double> @permpd_ri_256(<4 x double> %a0) #0 {
 ; ENABLE-LABEL: permpd_ri_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -569,7 +569,7 @@
   ret <4 x double> %res
 }
 
-define <4 x double> @permpd_rr_256(<4 x double> %a0, <4 x i64> %idx) {
+define <4 x double> @permpd_rr_256(<4 x double> %a0, <4 x i64> %idx) #0 {
 ; ENABLE-LABEL: permpd_rr_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -606,7 +606,7 @@
   ret <4 x double> %res
 }
 
-define <4 x double> @permpd_rm_256(ptr %p0, <4 x i64> %idx) {
+define <4 x double> @permpd_rm_256(ptr %p0, <4 x i64> %idx) #0 {
 ; ENABLE-LABEL: permpd_rm_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -639,7 +639,7 @@
   ret <4 x double> %res
 }
 
-define <4 x double> @permpd_mi_256(ptr %p0) {
+define <4 x double> @permpd_mi_256(ptr %p0) #0 {
 ; ENABLE-LABEL: permpd_mi_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -662,7 +662,7 @@
   ret <4 x double> %2
 }
 
-define <4 x double> @permpd_broadcast_256(ptr %p0, <4 x i64> %idx) {
+define <4 x double> @permpd_broadcast_256(ptr %p0, <4 x i64> %idx) #0 {
 ; ENABLE-LABEL: permpd_broadcast_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -697,7 +697,7 @@
   ret <4 x double> %res
 }
 
-define <4 x double> @permpd_maskz_256(<4 x double> %a0, <4 x i64> %idx, ptr %mask) {
+define <4 x double> @permpd_maskz_256(<4 x double> %a0, <4 x i64> %idx, ptr %mask) #0 {
 ; ENABLE-LABEL: permpd_maskz_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -734,7 +734,7 @@
 declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>)
 declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8)
 
-define <8 x double> @permpd_rr_512(<8 x double> %a0, <8 x i64> %idx) {
+define <8 x double> @permpd_rr_512(<8 x double> %a0, <8 x i64> %idx) #0 {
 ; ENABLE-LABEL: permpd_rr_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -771,7 +771,7 @@
   ret <8 x double> %res
 }
 
-define <8 x double> @permpd_rm_512(ptr %p0, <8 x i64> %idx) {
+define <8 x double> @permpd_rm_512(ptr %p0, <8 x i64> %idx) #0 {
 ; ENABLE-LABEL: permpd_rm_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -804,7 +804,7 @@
   ret <8 x double> %res
 }
 
-define <8 x double> @permpd_broadcast_512(ptr %p0, <8 x i64> %idx) {
+define <8 x double> @permpd_broadcast_512(ptr %p0, <8 x i64> %idx) #0 {
 ; ENABLE-LABEL: permpd_broadcast_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -839,7 +839,7 @@
   ret <8 x double> %res
 }
 
-define <8 x double> @permpd_maskz_512(<8 x double> %a0, <8 x i64> %idx, ptr %mask) {
+define <8 x double> @permpd_maskz_512(<8 x double> %a0, <8 x i64> %idx, ptr %mask) #0 {
 ; ENABLE-LABEL: permpd_maskz_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -877,7 +877,7 @@
 declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
 
 
-define <8 x float> @permps_rr_256(<8 x float> %a0, <8 x i32> %idx) {
+define <8 x float> @permps_rr_256(<8 x float> %a0, <8 x i32> %idx) #0 {
 ; ENABLE-LABEL: permps_rr_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -914,7 +914,7 @@
   ret <8 x float> %res
 }
 
-define <8 x float> @permps_rm_256(ptr %p0, <8 x i32> %idx) {
+define <8 x float> @permps_rm_256(ptr %p0, <8 x i32> %idx) #0 {
 ; ENABLE-LABEL: permps_rm_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -947,7 +947,7 @@
   ret <8 x float> %res
 }
 
-define <8 x float> @permps_broadcast_256(ptr %p0, <8 x i32> %idx) {
+define <8 x float> @permps_broadcast_256(ptr %p0, <8 x i32> %idx) #0 {
 ; ENABLE-LABEL: permps_broadcast_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -982,7 +982,7 @@
   ret <8 x float> %res
 }
 
-define <8 x float> @permps_maskz_256(<8 x float> %a0, <8 x i32> %idx, ptr %mask) {
+define <8 x float> @permps_maskz_256(<8 x float> %a0, <8 x i32> %idx, ptr %mask) #0 {
 ; ENABLE-LABEL: permps_maskz_256:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -1018,7 +1018,7 @@
 
 declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
 
-define <16 x float> @permps_rr_512(<16 x float> %a0, <16 x i32> %idx) {
+define <16 x float> @permps_rr_512(<16 x float> %a0, <16 x i32> %idx) #0 {
 ; ENABLE-LABEL: permps_rr_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -1055,7 +1055,7 @@
   ret <16 x float> %res
 }
 
-define <16 x float> @permps_rm_512(ptr %p0, <16 x i32> %idx) {
+define <16 x float> @permps_rm_512(ptr %p0, <16 x i32> %idx) #0 {
 ; ENABLE-LABEL: permps_rm_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -1088,7 +1088,7 @@
   ret <16 x float> %res
 }
 
-define <16 x float> @permps_broadcast_512(ptr %p0, <16 x i32> %idx) {
+define <16 x float> @permps_broadcast_512(ptr %p0, <16 x i32> %idx) #0 {
 ; ENABLE-LABEL: permps_broadcast_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
@@ -1123,7 +1123,7 @@
   ret <16 x float> %res
 }
 
-define <16 x float> @permps_maskz_512(<16 x float> %a0, <16 x i32> %idx, ptr %mask) {
+define <16 x float> @permps_maskz_512(<16 x float> %a0, <16 x i32> %idx, ptr %mask) #0 {
 ; ENABLE-LABEL: permps_maskz_512:
 ; ENABLE:       # %bb.0:
 ; ENABLE-NEXT:    #APP
@@ -1159,3 +1159,5 @@
 
 declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>)
 declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16)
+
+attributes #0 = { "min-legal-vector-width" = "512" }
Index: llvm/test/CodeGen/X86/pr47299.ll
===================================================================
--- llvm/test/CodeGen/X86/pr47299.ll
+++ llvm/test/CodeGen/X86/pr47299.ll
@@ -8,7 +8,7 @@
 declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)
 declare <64 x i1> @llvm.get.active.lane.mask.v64i1.i32(i32, i32)
 
-define <7 x i1> @create_mask7(i64 %0) {
+define <7 x i1> @create_mask7(i64 %0) #0 {
 ; CHECK-LABEL: create_mask7:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mov rax, rdi
@@ -53,7 +53,7 @@
   ret <7 x i1> %2
 }
 
-define <16 x i1> @create_mask16(i64 %0) {
+define <16 x i1> @create_mask16(i64 %0) #0 {
 ; CHECK-LABEL: create_mask16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpbroadcastq zmm0, rdi
@@ -67,7 +67,7 @@
   ret <16 x i1> %2
 }
 
-define <32 x i1> @create_mask32(i64 %0) {
+define <32 x i1> @create_mask32(i64 %0) #0 {
 ; CHECK-LABEL: create_mask32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpbroadcastq zmm0, rdi
@@ -84,7 +84,7 @@
   ret <32 x i1> %2
 }
 
-define <64 x i1> @create_mask64(i64 %0) {
+define <64 x i1> @create_mask64(i64 %0) #0 {
 ; CHECK-LABEL: create_mask64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpbroadcastq zmm0, rdi
@@ -109,7 +109,7 @@
   ret <64 x i1> %2
 }
 
-define <16 x i1> @create_mask16_i32(i32 %0) {
+define <16 x i1> @create_mask16_i32(i32 %0) #0 {
 ; CHECK-LABEL: create_mask16_i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpbroadcastd zmm0, edi
@@ -121,7 +121,7 @@
   ret <16 x i1> %2
 }
 
-define <64 x i1> @create_mask64_i32(i32 %0) {
+define <64 x i1> @create_mask64_i32(i32 %0) #0 {
 ; CHECK-LABEL: create_mask64_i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpbroadcastd zmm0, edi
@@ -137,3 +137,5 @@
   %2 = call <64 x i1> @llvm.get.active.lane.mask.v64i1.i32(i32 0, i32 %0)
   ret <64 x i1> %2
 }
+
+attributes #0 = { "min-legal-vector-width" = "512" }
Index: llvm/test/CodeGen/X86/pr48727.ll
===================================================================
--- llvm/test/CodeGen/X86/pr48727.ll
+++ llvm/test/CodeGen/X86/pr48727.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=skx | FileCheck %s
 
-define void @PR48727() {
+define void @PR48727() #0 {
 ; CHECK-LABEL: PR48727:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vcvttpd2dqy 0, %xmm0
@@ -43,3 +43,5 @@
 !1 = !{!2}
 !2 = !{!"buffer: {index:1, offset:0, size:20000}", !3}
 !3 = !{!"XLA global AA domain"}
+
+attributes #0 = { "min-legal-vector-width" = "512" }
Index: llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
===================================================================
--- llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
+++ llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
@@ -5,7 +5,7 @@
 ; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=knl | FileCheck %s --check-prefixes=CHECK,KNL,X86,KNL32
 
 ;expand 128 -> 256 include <4 x float> <2 x double>
-define <8 x float> @expand(<4 x float> %a) {
+define <8 x float> @expand(<4 x float> %a) #0 {
 ; SKX-LABEL: expand:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
@@ -24,7 +24,7 @@
    ret <8 x float> %res
 }
 
-define <8 x float> @expand1(<4 x float> %a ) {
+define <8 x float> @expand1(<4 x float> %a ) #0 {
 ; SKX-LABEL: expand1:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
@@ -46,7 +46,7 @@
 }
 
 ;Expand 128 -> 256 test <2 x double> -> <4 x double>
-define <4 x double> @expand2(<2 x double> %a) {
+define <4 x double> @expand2(<2 x double> %a) #0 {
 ; CHECK-LABEL: expand2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
@@ -59,7 +59,7 @@
 }
 
 ;expand 128 -> 256 include case <4 x i32> <8 x i32>
-define <8 x i32> @expand3(<4 x i32> %a ) {
+define <8 x i32> @expand3(<4 x i32> %a ) #0 {
 ; SKX-LABEL: expand3:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
@@ -79,7 +79,7 @@
 }
 
 ;expand 128 -> 256 include case <2 x i64> <4 x i64>
-define <4 x i64> @expand4(<2 x i64> %a ) {
+define <4 x i64> @expand4(<2 x i64> %a ) #0 {
 ; SKX-LABEL: expand4:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
@@ -100,7 +100,7 @@
 }
 
 ;Negative test for 128-> 256
-define <8 x float> @expand5(<4 x float> %a ) {
+define <8 x float> @expand5(<4 x float> %a ) #0 {
 ; SKX-LABEL: expand5:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
@@ -120,7 +120,7 @@
 }
 
 ;expand 256 -> 512 include <8 x float> <16 x float>
-define <8 x float> @expand6(<4 x float> %a ) {
+define <8 x float> @expand6(<4 x float> %a ) #0 {
 ; CHECK-LABEL: expand6:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
@@ -130,7 +130,7 @@
    ret <8 x float> %res
 }
 
-define <16 x float> @expand7(<8 x float> %a) {
+define <16 x float> @expand7(<8 x float> %a) #0 {
 ; SKX-LABEL: expand7:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
@@ -150,7 +150,7 @@
    ret <16 x float> %res
 }
 
-define <16 x float> @expand8(<8 x float> %a ) {
+define <16 x float> @expand8(<8 x float> %a ) #0 {
 ; SKX-LABEL: expand8:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
@@ -171,7 +171,7 @@
 }
 
 ;expand 256 -> 512 include <4 x double> <8 x double>
-define <8 x double> @expand9(<4 x double> %a) {
+define <8 x double> @expand9(<4 x double> %a) #0 {
 ; SKX-LABEL: expand9:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
@@ -191,7 +191,7 @@
    ret <8 x double> %res
 }
 
-define <16 x i32> @expand10(<8 x i32> %a ) {
+define <16 x i32> @expand10(<8 x i32> %a ) #0 {
 ; SKX-LABEL: expand10:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
@@ -211,7 +211,7 @@
    ret <16 x i32> %res
 }
 
-define <8 x i64> @expand11(<4 x i64> %a) {
+define <8 x i64> @expand11(<4 x i64> %a) #0 {
 ; SKX-LABEL: expand11:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
@@ -232,7 +232,7 @@
 }
 
 ;Negative test for 256-> 512
-define <16 x float> @expand12(<8 x float> %a) {
+define <16 x float> @expand12(<8 x float> %a) #0 {
 ; CHECK-LABEL: expand12:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
@@ -245,7 +245,7 @@
    ret <16 x float> %res
 }
 
-define <16 x float> @expand13(<8 x float> %a ) {
+define <16 x float> @expand13(<8 x float> %a ) #0 {
 ; CHECK-LABEL: expand13:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
@@ -257,7 +257,7 @@
 
 ; The function checks for a case where the vector is mixed values vector ,and the mask points on zero elements from this vector.
 
-define <8 x float> @expand14(<4 x float> %a) {
+define <8 x float> @expand14(<4 x float> %a) #0 {
 ; SKX-LABEL: expand14:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
@@ -280,7 +280,7 @@
 }
 
 ;Negative test.
-define <8 x float> @expand15(<4 x float> %a) {
+define <8 x float> @expand15(<4 x float> %a) #0 {
 ; SKX-LABEL: expand15:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
@@ -466,7 +466,7 @@
 }
 
 ; PR34370
-define <8 x float> @test_masked_permps_v8f32(ptr %vp, <8 x float> %vec2) {
+define <8 x float> @test_masked_permps_v8f32(ptr %vp, <8 x float> %vec2) #0 {
 ; SKX64-LABEL: test_masked_permps_v8f32:
 ; SKX64:       # %bb.0:
 ; SKX64-NEXT:    vmovaps (%rdi), %ymm2
@@ -508,7 +508,7 @@
   ret <8 x float> %res
 }
 
-define <16 x float> @test_masked_permps_v16f32(ptr %vp, <16 x float> %vec2) {
+define <16 x float> @test_masked_permps_v16f32(ptr %vp, <16 x float> %vec2) #0 {
 ; X64-LABEL: test_masked_permps_v16f32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    vmovaps (%rdi), %zmm2
@@ -531,7 +531,7 @@
   ret <16 x float> %res
 }
 
-define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
+define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) #0 {
 ; SKX64-LABEL: test_demandedelts_pshufb_v32i8_v16i8:
 ; SKX64:       # %bb.0:
 ; SKX64-NEXT:    vpbroadcastd 44(%rdi), %xmm0
@@ -594,7 +594,7 @@
   ret void
 }
 
-define <32 x float> @PR47534(<8 x float> %tmp) {
+define <32 x float> @PR47534(<8 x float> %tmp) #0 {
 ; CHECK-LABEL: PR47534:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
@@ -612,7 +612,7 @@
 %union1= type { <16 x float> }
 @src1 = external dso_local local_unnamed_addr global %union1, align 64
 
-define void @PR43170(ptr %a0) {
+define void @PR43170(ptr %a0) #0 {
 ; SKX64-LABEL: PR43170:
 ; SKX64:       # %bb.0: # %entry
 ; SKX64-NEXT:    vmovaps src1(%rip), %ymm0
@@ -646,3 +646,5 @@
   store <16 x float> %1, ptr %a0, align 64
   ret void
 }
+
+attributes #0 = { "min-legal-vector-width" = "512" }
Index: llvm/test/CodeGen/X86/vector-trunc-usat.ll
===================================================================
--- llvm/test/CodeGen/X86/vector-trunc-usat.ll
+++ llvm/test/CodeGen/X86/vector-trunc-usat.ll
@@ -19,7 +19,7 @@
 ; Unsigned saturation truncation to vXi32
 ;
 
-define <2 x i32> @trunc_usat_v2i64_v2i32(<2 x i64> %a0) {
+define <2 x i32> @trunc_usat_v2i64_v2i32(<2 x i64> %a0) #0 {
 ; SSE2-LABEL: trunc_usat_v2i64_v2i32:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
@@ -119,7 +119,7 @@
   ret <2 x i32> %3
 }
 
-define void @trunc_usat_v2i64_v2i32_store(<2 x i64> %a0, ptr %p1) {
+define void @trunc_usat_v2i64_v2i32_store(<2 x i64> %a0, ptr %p1) #0 {
 ; SSE2-LABEL: trunc_usat_v2i64_v2i32_store:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
@@ -224,7 +224,7 @@
   ret void
 }
 
-define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
+define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) #0 {
 ; SSE2-LABEL: trunc_usat_v4i64_v4i32:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
@@ -430,7 +430,7 @@
   ret <4 x i32> %3
 }
 
-define <8 x i32> @trunc_usat_v8i64_v8i32(ptr %p0) {
+define <8 x i32> @trunc_usat_v8i64_v8i32(ptr %p0) #0 {
 ; SSE2-LABEL: trunc_usat_v8i64_v8i32:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa (%rdi), %xmm2
@@ -717,7 +717,7 @@
 ; Unsigned saturation truncation to vXi16
 ;
 
-define <2 x i16> @trunc_usat_v2i64_v2i16(<2 x i64> %a0) {
+define <2 x i16> @trunc_usat_v2i64_v2i16(<2 x i64> %a0) #0 {
 ; SSE2-LABEL: trunc_usat_v2i64_v2i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
@@ -840,7 +840,7 @@
   ret <2 x i16> %3
 }
 
-define void @trunc_usat_v2i64_v2i16_store(<2 x i64> %a0, ptr %p1) {
+define void @trunc_usat_v2i64_v2i16_store(<2 x i64> %a0, ptr %p1) #0 {
 ; SSE2-LABEL: trunc_usat_v2i64_v2i16_store:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
@@ -972,7 +972,7 @@
   ret void
 }
 
-define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) {
+define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) #0 {
 ; SSE2-LABEL: trunc_usat_v4i64_v4i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535]
@@ -1145,7 +1145,7 @@
   ret <4 x i16> %3
 }
 
-define void @trunc_usat_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) {
+define void @trunc_usat_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) #0 {
 ; SSE2-LABEL: trunc_usat_v4i64_v4i16_store:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535]
@@ -1325,7 +1325,7 @@
   ret void
 }
 
-define <8 x i16> @trunc_usat_v8i64_v8i16(ptr %p0) {
+define <8 x i16> @trunc_usat_v8i64_v8i16(ptr %p0) #0 {
 ; SSE2-LABEL: trunc_usat_v8i64_v8i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa (%rdi), %xmm4
@@ -1594,7 +1594,7 @@
   ret <8 x i16> %3
 }
 
-define <4 x i16> @trunc_usat_v4i32_v4i16(<4 x i32> %a0) {
+define <4 x i16> @trunc_usat_v4i32_v4i16(<4 x i32> %a0) #0 {
 ; SSE2-LABEL: trunc_usat_v4i32_v4i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
@@ -1676,7 +1676,7 @@
   ret <4 x i16> %3
 }
 
-define void @trunc_usat_v4i32_v4i16_store(<4 x i32> %a0, ptr%p1) {
+define void @trunc_usat_v4i32_v4i16_store(<4 x i32> %a0, ptr%p1) #0 {
 ; SSE2-LABEL: trunc_usat_v4i32_v4i16_store:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
@@ -1764,7 +1764,7 @@
   ret void
 }
 
-define <8 x i16> @trunc_usat_v8i32_v8i16(<8 x i32> %a0) {
+define <8 x i16> @trunc_usat_v8i32_v8i16(<8 x i32> %a0) #0 {
 ; SSE2-LABEL: trunc_usat_v8i32_v8i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
@@ -1878,7 +1878,7 @@
   ret <8 x i16> %3
 }
 
-define <16 x i16> @trunc_usat_v16i32_v16i16(ptr %p0) {
+define <16 x i16> @trunc_usat_v16i32_v16i16(ptr %p0) #0 {
 ; SSE2-LABEL: trunc_usat_v16i32_v16i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa (%rdi), %xmm5
@@ -2030,7 +2030,7 @@
 ; Unsigned saturation truncation to vXi8
 ;
 
-define <2 x i8> @trunc_usat_v2i64_v2i8(<2 x i64> %a0) {
+define <2 x i8> @trunc_usat_v2i64_v2i8(<2 x i64> %a0) #0 {
 ; SSE2-LABEL: trunc_usat_v2i64_v2i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
@@ -2132,7 +2132,7 @@
   ret <2 x i8> %3
 }
 
-define void @trunc_usat_v2i64_v2i8_store(<2 x i64> %a0, ptr %p1) {
+define void @trunc_usat_v2i64_v2i8_store(<2 x i64> %a0, ptr %p1) #0 {
 ; SSE2-LABEL: trunc_usat_v2i64_v2i8_store:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
@@ -2242,7 +2242,7 @@
   ret void
 }
 
-define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) {
+define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) #0 {
 ; SSE2-LABEL: trunc_usat_v4i64_v4i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255]
@@ -2419,7 +2419,7 @@
   ret <4 x i8> %3
 }
 
-define void @trunc_usat_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) {
+define void @trunc_usat_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) #0 {
 ; SSE2-LABEL: trunc_usat_v4i64_v4i8_store:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255]
@@ -2603,7 +2603,7 @@
   ret void
 }
 
-define <8 x i8> @trunc_usat_v8i64_v8i8(ptr %p0) {
+define <8 x i8> @trunc_usat_v8i64_v8i8(ptr %p0) #0 {
 ; SSE2-LABEL: trunc_usat_v8i64_v8i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa (%rdi), %xmm6
@@ -2861,7 +2861,7 @@
   ret <8 x i8> %3
 }
 
-define void @trunc_usat_v8i64_v8i8_store(ptr %p0, ptr%p1) {
+define void @trunc_usat_v8i64_v8i8_store(ptr %p0, ptr%p1) #0 {
 ; SSE2-LABEL: trunc_usat_v8i64_v8i8_store:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa (%rdi), %xmm6
@@ -3124,7 +3124,7 @@
   ret void
 }
 
-define <16 x i8> @trunc_usat_v16i64_v16i8(ptr %p0) {
+define <16 x i8> @trunc_usat_v16i64_v16i8(ptr %p0) #0 {
 ; SSE2-LABEL: trunc_usat_v16i64_v16i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa 96(%rdi), %xmm1
@@ -3587,7 +3587,7 @@
   ret <16 x i8> %3
 }
 
-define <4 x i8> @trunc_usat_v4i32_v4i8(<4 x i32> %a0) {
+define <4 x i8> @trunc_usat_v4i32_v4i8(<4 x i32> %a0) #0 {
 ; SSE2-LABEL: trunc_usat_v4i32_v4i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
@@ -3670,7 +3670,7 @@
   ret <4 x i8> %3
 }
 
-define void @trunc_usat_v4i32_v4i8_store(<4 x i32> %a0, ptr%p1) {
+define void @trunc_usat_v4i32_v4i8_store(<4 x i32> %a0, ptr%p1) #0 {
 ; SSE2-LABEL: trunc_usat_v4i32_v4i8_store:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
@@ -3769,7 +3769,7 @@
   ret void
 }
 
-define <8 x i8> @trunc_usat_v8i32_v8i8(<8 x i32> %a0) {
+define <8 x i8> @trunc_usat_v8i32_v8i8(<8 x i32> %a0) #0 {
 ; SSE2-LABEL: trunc_usat_v8i32_v8i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255]
@@ -3879,7 +3879,7 @@
   ret <8 x i8> %3
 }
 
-define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, ptr%p1) {
+define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, ptr%p1) #0 {
 ; SSE2-LABEL: trunc_usat_v8i32_v8i8_store:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255]
@@ -3997,7 +3997,7 @@
   ret void
 }
 
-define <16 x i8> @trunc_usat_v16i32_v16i8(ptr %p0) {
+define <16 x i8> @trunc_usat_v16i32_v16i8(ptr %p0) #0 {
 ; SSE2-LABEL: trunc_usat_v16i32_v16i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa (%rdi), %xmm6
@@ -4137,7 +4137,7 @@
   ret <16 x i8> %3
 }
 
-define void @trunc_usat_v16i32_v16i8_store(ptr %p0, ptr %p1) {
+define void @trunc_usat_v16i32_v16i8_store(ptr %p0, ptr %p1) #0 {
 ; SSE2-LABEL: trunc_usat_v16i32_v16i8_store:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa (%rdi), %xmm6
@@ -4283,7 +4283,7 @@
   ret void
 }
 
-define <8 x i8> @trunc_usat_v8i16_v8i8(<8 x i16> %a0) {
+define <8 x i8> @trunc_usat_v8i16_v8i8(<8 x i16> %a0) #0 {
 ; SSE2-LABEL: trunc_usat_v8i16_v8i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
@@ -4347,7 +4347,7 @@
   ret <8 x i8> %3
 }
 
-define void @trunc_usat_v8i16_v8i8_store(<8 x i16> %a0, ptr%p1) {
+define void @trunc_usat_v8i16_v8i8_store(<8 x i16> %a0, ptr%p1) #0 {
 ; SSE2-LABEL: trunc_usat_v8i16_v8i8_store:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
@@ -4418,7 +4418,7 @@
   ret void
 }
 
-define <16 x i8> @trunc_usat_v16i16_v16i8(<16 x i16> %a0) {
+define <16 x i8> @trunc_usat_v16i16_v16i8(<16 x i16> %a0) #0 {
 ; SSE2-LABEL: trunc_usat_v16i16_v16i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
@@ -4510,7 +4510,7 @@
   ret <16 x i8> %3
 }
 
-define <32 x i8> @trunc_usat_v32i16_v32i8(ptr %p0) {
+define <32 x i8> @trunc_usat_v32i16_v32i8(ptr %p0) #0 {
 ; SSE2-LABEL: trunc_usat_v32i16_v32i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa (%rdi), %xmm0
@@ -4640,7 +4640,7 @@
   ret <32 x i8> %3
 }
 
-define <32 x i8> @trunc_usat_v32i32_v32i8(ptr %p0) {
+define <32 x i8> @trunc_usat_v32i32_v32i8(ptr %p0) #0 {
 ; SSE2-LABEL: trunc_usat_v32i32_v32i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa (%rdi), %xmm7
@@ -4875,3 +4875,5 @@
   %3 = trunc <32 x i32> %2 to <32 x i8>
   ret <32 x i8> %3
 }
+
+attributes #0 = { "min-legal-vector-width" = "512" }