Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -497,7 +497,8 @@ // 4. Width of vector arguments and return types for this function. // 5. Width of vector aguments and return types for functions called by this // function. - CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth)); + if (LargestVectorWidth != 0) + CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth)); // Add vscale_range attribute if appropriate. Optional> VScaleRange = Index: clang/test/CodeGen/aarch64-neon-ldst-one.c =================================================================== --- clang/test/CodeGen/aarch64-neon-ldst-one.c +++ clang/test/CodeGen/aarch64-neon-ldst-one.c @@ -6802,4 +6802,4 @@ // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="128" // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="64" -// CHECK: attributes #2 ={{.*}}"min-legal-vector-width"="0" +// CHECK-NOT: "min-legal-vector-width"="0" Index: clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c =================================================================== --- clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c +++ clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c @@ -47,7 +47,7 @@ return vmul_n_f64(a, b); } -// CHECK-LABEL: define{{.*}} float @test_vmulxs_lane_f32(float noundef %a, <2 x float> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} float @test_vmulxs_lane_f32(float noundef %a, <2 x float> noundef %b) #2 { // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1 // CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGET_LANE]]) // CHECK: ret float [[VMULXS_F32_I]] @@ -55,7 +55,7 @@ return vmulxs_lane_f32(a, b, 1); } -// CHECK-LABEL: define{{.*}} float @test_vmulxs_laneq_f32(float noundef %a, <4 x float> noundef %b) #1 { +// CHECK-LABEL: define{{.*}} float @test_vmulxs_laneq_f32(float noundef %a, <4 x float> noundef %b) #2 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3 // CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGETQ_LANE]]) // CHECK: ret float [[VMULXS_F32_I]] @@ -63,7 +63,7 @@ return vmulxs_laneq_f32(a, b, 3); } -// CHECK-LABEL: define{{.*}} double @test_vmulxd_lane_f64(double noundef %a, <1 x double> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} double @test_vmulxd_lane_f64(double noundef %a, <1 x double> noundef %b) #2 { // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGET_LANE]]) // CHECK: ret double [[VMULXD_F64_I]] @@ -71,7 +71,7 @@ return vmulxd_lane_f64(a, b, 0); } -// CHECK-LABEL: define{{.*}} double @test_vmulxd_laneq_f64(double noundef %a, <2 x double> noundef %b) #1 { +// CHECK-LABEL: define{{.*}} double @test_vmulxd_laneq_f64(double noundef %a, <2 x double> noundef %b) #2 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGETQ_LANE]]) // CHECK: ret double [[VMULXD_F64_I]] @@ -79,7 +79,7 @@ return vmulxd_laneq_f64(a, b, 1); } -// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64(<1 x double> noundef %a, <1 x double> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64(<1 x double> noundef %a, <1 x double> noundef %b) #2 { // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0 // CHECK: [[VGET_LANE6:%.*]] = extractelement <1 x double> %b, i32 0 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE6]]) @@ -90,7 +90,7 @@ } -// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_0(<1 x double> noundef %a, <2 x double> noundef %b) #1 { +// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_0(<1 x double> noundef %a, <2 x double> noundef %b) #2 { // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 0 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) @@ -100,7 +100,7 @@ return vmulx_laneq_f64(a, b, 0); } -// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_1(<1 x double> noundef %a, <2 x double> noundef %b) #1 { +// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_1(<1 x double> noundef %a, <2 x double> noundef %b) #2 { // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) @@ -204,7 +204,7 @@ return vfms_laneq_f64(a, b, v, 0); } -// CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #2 { // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 @@ -215,7 +215,7 @@ return vqdmullh_lane_s16(a, b, 3); } -// CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #2 { // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1 // CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGET_LANE]]) // CHECK: ret i64 [[VQDMULLS_S32_I]] @@ -223,7 +223,7 @@ return vqdmulls_lane_s32(a, b, 1); } -// CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #1 { +// CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #2 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 @@ -234,7 +234,7 @@ return vqdmullh_laneq_s16(a, b, 7); } -// CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #1 { +// CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #2 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3 // CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGETQ_LANE]]) // CHECK: ret i64 [[VQDMULLS_S32_I]] @@ -242,7 +242,7 @@ return vqdmulls_laneq_s32(a, b, 3); } -// CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #2 { // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 @@ -253,7 +253,7 @@ return vqdmulhh_lane_s16(a, b, 3); } -// CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #2 { // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1 // CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGET_LANE]]) // CHECK: ret i32 [[VQDMULHS_S32_I]] @@ -262,7 +262,7 @@ } -// CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #1 { +// CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #2 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 @@ -274,7 +274,7 @@ } -// CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #1 { +// CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #2 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3 // CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGETQ_LANE]]) // CHECK: ret i32 [[VQDMULHS_S32_I]] @@ -282,7 +282,7 @@ return vqdmulhs_laneq_s32(a, b, 3); } -// CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #2 { // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 @@ -293,7 +293,7 @@ return vqrdmulhh_lane_s16(a, b, 3); } -// CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #2 { // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1 // CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGET_LANE]]) // CHECK: ret i32 [[VQRDMULHS_S32_I]] @@ -302,7 +302,7 @@ } -// CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #1 { +// CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #2 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 @@ -314,7 +314,7 @@ } -// CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #1 { +// CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #2 { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3 // CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGETQ_LANE]]) // CHECK: ret i32 [[VQRDMULHS_S32_I]] @@ -406,7 +406,7 @@ return vqdmlsls_laneq_s32(a, b, c, 3); } -// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64_0() #0 { +// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64_0() #2 { // CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double> // CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double> // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0 @@ -425,7 +425,7 @@ return result; } -// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_2() #1 { +// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_2() #2 { // CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double> // CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> Index: clang/test/CodeGen/aarch64-poly128.c =================================================================== --- clang/test/CodeGen/aarch64-poly128.c +++ clang/test/CodeGen/aarch64-poly128.c @@ -28,8 +28,8 @@ // CHECK-LABEL: define {{[^@]+}}@test_vldrq_p128 // CHECK-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[PTR]], align 16 -// CHECK-NEXT: ret i128 [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr [[PTR]], align 16 +// CHECK-NEXT: ret i128 [[TMP0]] // poly128_t test_vldrq_p128(poly128_t * ptr) { return vldrq_p128(ptr); @@ -39,9 +39,9 @@ // CHECK-LABEL: define {{[^@]+}}@test_ld_st_p128 // CHECK-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[PTR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr [[PTR]], align 16 // CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i128, ptr [[PTR]], i64 1 -// CHECK-NEXT: store i128 [[TMP2]], ptr [[ADD_PTR]], align 16 +// CHECK-NEXT: store i128 [[TMP0]], ptr [[ADD_PTR]], align 16 // CHECK-NEXT: ret void // void test_ld_st_p128(poly128_t * ptr) { @@ -61,7 +61,7 @@ } // CHECK-LABEL: define {{[^@]+}}@test_vmull_high_p64 -// CHECK-SAME: (<2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-SAME: (<2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[SHUFFLE_I5:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[A]], <1 x i32> // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[SHUFFLE_I5]] to i64 @@ -76,7 +76,7 @@ } // CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_s8 -// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to i128 // CHECK-NEXT: ret i128 [[TMP0]] Index: clang/test/CodeGen/aarch64-poly64.c =================================================================== --- clang/test/CodeGen/aarch64-poly64.c +++ clang/test/CodeGen/aarch64-poly64.c @@ -538,4 +538,4 @@ // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" -// CHECK: attributes #2 ={{.*}}"min-legal-vector-width"="0" +// CHECK-NOT: "min-legal-vector-width"="0" Index: clang/test/CodeGen/regcall2.c =================================================================== --- clang/test/CodeGen/regcall2.c +++ clang/test/CodeGen/regcall2.c @@ -21,7 +21,7 @@ // FIXME: Do we need to change for Windows? // Win: define dso_local x86_regcallcc void @__regcall3__foo(ptr noalias sret(%struct.__sVector) align 64 %agg.result, i32 noundef %a) #0 // Win: define dso_local x86_regcallcc double @__regcall3__bar(ptr noundef %a) #0 -// Win: attributes #0 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+avx512f,+avx512vl,+crc32,+cx8,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" } +// Win: attributes #0 = { noinline nounwind optnone "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+avx512f,+avx512vl,+crc32,+cx8,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" } // Lin: define dso_local x86_regcallcc %struct.__sVector @__regcall3__foo(i32 noundef %a) #0 // Lin: define dso_local x86_regcallcc double @__regcall3__bar([4 x <8 x double>] %a.coerce0, [4 x <16 x float>] %a.coerce1) #0 Index: clang/test/CodeGenCXX/arm-generated-fn-attr.cpp =================================================================== --- clang/test/CodeGenCXX/arm-generated-fn-attr.cpp +++ clang/test/CodeGenCXX/arm-generated-fn-attr.cpp @@ -23,14 +23,14 @@ // CHECK: define {{.*}} @__cxx_global_var_init() [[ATTR1:#[0-9]+]] // CHECK: define {{.*}} @__clang_call_terminate({{.*}}) [[ATTR2:#[0-9]+]] -// CHECK: define {{.*}} @_ZTW4var1() [[ATTR3:#[0-9]+]] -// CHECK: define {{.*}} @_ZTW4var2() [[ATTR3]] +// CHECK: define {{.*}} @_ZTW4var1() [[ATTR1]] +// CHECK: define {{.*}} @_ZTW4var2() [[ATTR1]] // CHECK: define {{.*}} @__tls_init() [[ATTR1]] -// CHECK-PACBTI: attributes [[ATTR1]] = { {{.*}}"target-features"="+armv8.1-m.main,+pacbti,+thumb-mode"{{.*}} } -// CHECK-PACBTI: attributes [[ATTR2]] = { {{.*}}"target-features"="+armv8.1-m.main,+pacbti,+thumb-mode"{{.*}} } -// CHECK-PACBTI: attributes [[ATTR3]] = { {{.*}}"target-features"="+armv8.1-m.main,+pacbti,+thumb-mode"{{.*}} } + +// CHECK-PACBTI: attributes [[ATTR1]] = { {{.*}}"target-features"="+armv8.1-m.main,+pacbti,+thumb-mode" } +// CHECK-PACBTI: attributes [[ATTR2]] = { {{.*}}"target-features"="+armv8.1-m.main,+pacbti,+thumb-mode" } + // CHECK-NOPACBTI: attributes [[ATTR1]] = { {{.*}}"target-features"="+armv8.1-m.main,+thumb-mode,-pacbti"{{.*}} } // CHECK-NOPACBTI: attributes [[ATTR2]] = { {{.*}}"target-features"="+armv8.1-m.main,+thumb-mode,-pacbti"{{.*}} } -// CHECK-NOPACBTI: attributes [[ATTR3]] = { {{.*}}"target-features"="+armv8.1-m.main,+thumb-mode,-pacbti"{{.*}} } Index: clang/test/CodeGenCXX/dllexport-ctor-closure-nested.cpp =================================================================== --- clang/test/CodeGenCXX/dllexport-ctor-closure-nested.cpp +++ clang/test/CodeGenCXX/dllexport-ctor-closure-nested.cpp @@ -17,4 +17,4 @@ }; // CHECK-LABEL: $"??1HasImplicitDtor1@@QAE@XZ" = comdat any -// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorClosureInner@CtorClosureOuter@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat +// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorClosureInner@CtorClosureOuter@@QAEXXZ"({{.*}}) comdat Index: clang/test/CodeGenCXX/dllexport-ctor-closure.cpp =================================================================== --- clang/test/CodeGenCXX/dllexport-ctor-closure.cpp +++ clang/test/CodeGenCXX/dllexport-ctor-closure.cpp @@ -5,7 +5,7 @@ struct CtorWithClosure { __declspec(dllexport) CtorWithClosure(...) {} -// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorWithClosure@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat +// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorWithClosure@@QAEXXZ"({{.*}}) comdat // CHECK: %[[this_addr:.*]] = alloca ptr, align 4 // CHECK: store ptr %this, ptr %[[this_addr]], align 4 // CHECK: %[[this:.*]] = load ptr, ptr %[[this_addr]] @@ -17,7 +17,7 @@ __declspec(dllexport) CtorWithClosureOutOfLine(...); }; CtorWithClosureOutOfLine::CtorWithClosureOutOfLine(...) {} -// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorWithClosureOutOfLine@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat +// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorWithClosureOutOfLine@@QAEXXZ"({{.*}}) comdat #define DELETE_IMPLICIT_MEMBERS(ClassName) \ ClassName(ClassName &&) = delete; \ @@ -28,7 +28,7 @@ struct __declspec(dllexport) ClassWithClosure { DELETE_IMPLICIT_MEMBERS(ClassWithClosure); ClassWithClosure(...) {} -// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FClassWithClosure@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat +// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FClassWithClosure@@QAEXXZ"({{.*}}) comdat // CHECK: %[[this_addr:.*]] = alloca ptr, align 4 // CHECK: store ptr %this, ptr %[[this_addr]], align 4 // CHECK: %[[this:.*]] = load ptr, ptr %[[this_addr]] @@ -44,10 +44,10 @@ extern template struct TemplateWithClosure; template struct __declspec(dllexport) TemplateWithClosure; -// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$TemplateWithClosure@D@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat +// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$TemplateWithClosure@D@@QAEXXZ"({{.*}}) comdat // CHECK: call {{.*}} @"??0?$TemplateWithClosure@D@@QAE@H@Z"({{.*}}, i32 noundef 1) -// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$TemplateWithClosure@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat +// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$TemplateWithClosure@H@@QAEXXZ"({{.*}}) comdat // CHECK: call {{.*}} @"??0?$TemplateWithClosure@H@@QAE@H@Z"({{.*}}, i32 noundef 4) template struct __declspec(dllexport) ExportedTemplateWithClosure { @@ -55,7 +55,7 @@ }; template <> ExportedTemplateWithClosure::ExportedTemplateWithClosure(int); // Don't try to emit the closure for a declaration. template <> ExportedTemplateWithClosure::ExportedTemplateWithClosure(int) {}; -// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$ExportedTemplateWithClosure@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat +// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$ExportedTemplateWithClosure@H@@QAEXXZ"({{.*}}) comdat // CHECK: call {{.*}} @"??0?$ExportedTemplateWithClosure@H@@QAE@H@Z"({{.*}}, i32 noundef 4) struct __declspec(dllexport) NestedOuter { @@ -67,8 +67,8 @@ }; }; -// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FNestedOuter@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat -// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FNestedInner@NestedOuter@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat +// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FNestedOuter@@QAEXXZ"({{.*}}) comdat +// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FNestedInner@NestedOuter@@QAEXXZ"({{.*}}) comdat struct HasDtor { ~HasDtor(); Index: clang/test/CodeGenCXX/dllexport.cpp =================================================================== --- clang/test/CodeGenCXX/dllexport.cpp +++ clang/test/CodeGenCXX/dllexport.cpp @@ -535,7 +535,7 @@ // MSVC2013-DAG: define weak_odr dso_local dllexport {{.+}} @"??4?$SomeTemplate@H@@Q{{.+}}0@A{{.+}}0@@Z" struct __declspec(dllexport) InheritFromTemplate : SomeTemplate {}; -// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$SomeTemplate@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat +// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$SomeTemplate@H@@QAEXXZ"({{.*}}) comdat namespace PR23801 { template @@ -552,7 +552,7 @@ } // -// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FB@PR23801@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat +// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FB@PR23801@@QAEXXZ"({{.*}}) comdat struct __declspec(dllexport) T { // Copy assignment operator: Index: clang/test/OpenMP/amdgcn-attributes.cpp =================================================================== --- clang/test/OpenMP/amdgcn-attributes.cpp +++ clang/test/OpenMP/amdgcn-attributes.cpp @@ -32,12 +32,12 @@ return x + 1; } -// DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } -// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" } -// NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "kernel" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } -// UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } - -// DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" } -// NOIEEE: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// UNSAFEATOMIC: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" } +// NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +// UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } + +// DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" } +// NOIEEE: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// UNSAFEATOMIC: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } Index: clang/test/OpenMP/irbuilder_safelen.cpp =================================================================== --- clang/test/OpenMP/irbuilder_safelen.cpp +++ clang/test/OpenMP/irbuilder_safelen.cpp @@ -123,8 +123,8 @@ } } //. -// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } //. // CHECK: !0 = !{i32 1, !"wchar_size", i32 4} // CHECK: !1 = !{i32 7, !"openmp", i32 45} Index: clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp =================================================================== --- clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp +++ clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp @@ -123,8 +123,8 @@ } } //. -// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } //. // CHECK: !0 = !{i32 1, !"wchar_size", i32 4} // CHECK: !1 = !{i32 7, !"openmp", i32 50} Index: clang/test/OpenMP/irbuilder_simd_aligned.cpp =================================================================== --- clang/test/OpenMP/irbuilder_simd_aligned.cpp +++ clang/test/OpenMP/irbuilder_simd_aligned.cpp @@ -162,8 +162,8 @@ } } //. -// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } // CHECK: attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } //. // CHECK: !0 = !{i32 1, !"wchar_size", i32 4} Index: clang/test/OpenMP/irbuilder_simdlen.cpp =================================================================== --- clang/test/OpenMP/irbuilder_simdlen.cpp +++ clang/test/OpenMP/irbuilder_simdlen.cpp @@ -123,8 +123,8 @@ } } //. -// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } //. // CHECK: !0 = !{i32 1, !"wchar_size", i32 4} // CHECK: !1 = !{i32 7, !"openmp", i32 45} Index: clang/test/OpenMP/irbuilder_simdlen_safelen.cpp =================================================================== --- clang/test/OpenMP/irbuilder_simdlen_safelen.cpp +++ clang/test/OpenMP/irbuilder_simdlen_safelen.cpp @@ -123,8 +123,8 @@ } } //. -// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } //. // CHECK: !0 = !{i32 1, !"wchar_size", i32 4} // CHECK: !1 = !{i32 7, !"openmp", i32 45} Index: llvm/lib/Target/X86/X86TargetMachine.cpp =================================================================== --- llvm/lib/Target/X86/X86TargetMachine.cpp +++ llvm/lib/Target/X86/X86TargetMachine.cpp @@ -284,8 +284,12 @@ } // Extract min-legal-vector-width attribute. - unsigned RequiredVectorWidth = UINT32_MAX; + unsigned RequiredVectorWidth = 0; Attribute MinLegalVecWidthAttr = F.getFnAttribute("min-legal-vector-width"); + + // FIXME: The point the subtarget is constructed is not well defined. The + // attribute propagation passes may modify the attribute later, so you may get + // a different subtarget at different points in the pipeline. if (MinLegalVecWidthAttr.isValid()) { StringRef Val = MinLegalVecWidthAttr.getValueAsString(); unsigned Width; @@ -294,6 +298,37 @@ Key += Val; RequiredVectorWidth = Width; } + } else { + // FIXME: This reduction over vector return/argument vector size is + // effectively repeated in at least 5 places. If there should be an + // interaction between the argument types and the explicit + // min-legal-vector-width, it should be consistently applied in one location + // which is not the x86 subtarget constructor. A more reasonable approach + // would be to have attribute inference account for this, and treat an + // unannotated function consistently as 0 (but this requires test updates) + // (also if inference needs to account for explicit user attributes and the + // IR types, a separate x86 prefixed attribute would be better). + unsigned LargestVectorWidth = 0; + for (const Argument &A : F.args()) { + if (auto *VT = dyn_cast(A.getType())) { + LargestVectorWidth = + std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getKnownMinSize()); + } + } + + // Update vector width based on return type. + if (auto *VT = dyn_cast(F.getReturnType())) { + LargestVectorWidth = + std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getKnownMinSize()); + } + + if (LargestVectorWidth != 0) { + RequiredVectorWidth = LargestVectorWidth; + Key += 'm'; + Key += llvm::utostr(RequiredVectorWidth); + } } // Add CPU to the Key. Index: llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll +++ llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll @@ -18,7 +18,7 @@ ; } ; (relates to the testcase in PR50566) -define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) { +define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) #0 { ; DISABLED_MASKED_STRIDED-LABEL: 'test1' ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 @@ -75,7 +75,7 @@ ; y[i] = points[i*4 + 1]; ; } -define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) { +define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) #0 { ; DISABLED_MASKED_STRIDED-LABEL: 'test2' ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 @@ -142,7 +142,7 @@ ; x[i] = points[i*3]; ; } -define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) { +define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) #0 { ; DISABLED_MASKED_STRIDED-LABEL: 'test' ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx6, align 2 @@ -192,3 +192,5 @@ for.end: ret void } + +attributes #0 = { "min-legal-vector-width"="512" } Index: llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll +++ llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll @@ -18,7 +18,7 @@ ; } ; (relates to the testcase in PR50566) -define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) { +define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) #0 { ; DISABLED_MASKED_STRIDED-LABEL: 'test1' ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 @@ -75,7 +75,7 @@ ; points[i*4 + 1] = y[i]; ; } -define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) { +define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) #0 { ; DISABLED_MASKED_STRIDED-LABEL: 'test2' ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 @@ -142,7 +142,7 @@ ; points[i*3] = x[i]; ; } -define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) { +define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) #0 { ; DISABLED_MASKED_STRIDED-LABEL: 'test' ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 @@ -181,3 +181,5 @@ for.end: ret void } + +attributes #0 = { "min-legal-vector-width"="512" } Index: llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -8,7 +8,7 @@ ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,SKX -define i32 @masked_load() { +define i32 @masked_load() #0 { ; SSE2-LABEL: 'masked_load' ; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef) @@ -369,7 +369,7 @@ ret i32 0 } -define i32 @masked_store() { +define i32 @masked_store() #0 { ; SSE2-LABEL: 'masked_store' ; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef) @@ -730,7 +730,7 @@ ret i32 0 } -define i32 @masked_gather() { +define i32 @masked_gather() #0 { ; SSE2-LABEL: 'masked_gather' ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) @@ -953,7 +953,7 @@ ret i32 0 } -define i32 @masked_scatter() { +define i32 @masked_scatter() #0 { ; SSE2-LABEL: 'masked_scatter' ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) @@ -1122,7 +1122,7 @@ ret i32 0 } -define i32 @masked_expandload() { +define i32 @masked_expandload() #0 { ; SSE2-LABEL: 'masked_expandload' ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) @@ -1264,7 +1264,7 @@ ret i32 0 } -define i32 @masked_compressstore() { +define i32 @masked_compressstore() #0 { ; SSE2-LABEL: 'masked_compressstore' ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) @@ -1460,7 +1460,7 @@ ret i32 0 } -define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) { +define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) #0 { ; SSE2-LABEL: 'test1' ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) @@ -1486,7 +1486,7 @@ ret <2 x double> %res } -define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) { +define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) #0 { ; SSE2-LABEL: 'test2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) @@ -1512,7 +1512,7 @@ ret <4 x i32> %res } -define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) { +define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) #0 { ; SSE2-LABEL: 'test3' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask) @@ -1538,7 +1538,7 @@ ret void } -define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) { +define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) #0 { ; SSE2-LABEL: 'test4' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) @@ -1574,7 +1574,7 @@ ret <8 x float> %res } -define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { +define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) #0 { ; SSE2-LABEL: 'test5' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) @@ -1600,7 +1600,7 @@ ret void } -define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { +define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) #0 { ; SSE2-LABEL: 'test6' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) @@ -1626,7 +1626,7 @@ ret void } -define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) { +define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) #0 { ; SSE2-LABEL: 'test7' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) @@ -1652,7 +1652,7 @@ ret <2 x float> %res } -define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { +define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) #0 { ; SSE2-LABEL: 'test8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) @@ -1773,7 +1773,7 @@ ret <4 x i32> %res } -define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) { +define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) #0 { ; SSE2-LABEL: 'test_gather_16f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind @@ -1817,7 +1817,7 @@ ret <16 x float>%res } -define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) { +define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) #0 { ; SSE2-LABEL: 'test_gather_16f32_var_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind @@ -1861,7 +1861,7 @@ ret <16 x float>%res } -define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { +define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) #0 { ; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind @@ -1905,7 +1905,7 @@ ret <16 x float>%res } -define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) { +define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) #0 { ; SSE2-LABEL: 'test_gather_16f32_const_mask2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer @@ -1964,7 +1964,7 @@ ret <16 x float>%res } -define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { +define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) #0 { ; SSE2-LABEL: 'test_scatter_16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer @@ -2022,7 +2022,7 @@ ret void } -define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { +define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) #0 { ; SSE2-LABEL: 'test_scatter_8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -2043,7 +2043,7 @@ ret void } -define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { +define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) #0 { ; SSE2-LABEL: 'test_scatter_4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -2068,7 +2068,7 @@ ret void } -define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) { +define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) #0 { ; SSE2-LABEL: 'test_gather_4f32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind @@ -2118,7 +2118,7 @@ ret <4 x float>%res } -define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) { +define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) #0 { ; SSE2-LABEL: 'test_gather_4f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind @@ -2411,3 +2411,5 @@ declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>) declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>) declare void @llvm.masked.compressstore.v8i8(<8 x i8>, i8*, <8 x i1>) + +attributes #0 = { "min-legal-vector-width"="512" } Index: llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -8,7 +8,7 @@ ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,SKX -define i32 @masked_load() { +define i32 @masked_load() #0 { ; SSE2-LABEL: 'masked_load' ; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef) @@ -369,7 +369,7 @@ ret i32 0 } -define i32 @masked_store() { +define i32 @masked_store() #0 { ; SSE2-LABEL: 'masked_store' ; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef) @@ -730,7 +730,7 @@ ret i32 0 } -define i32 @masked_gather() { +define i32 @masked_gather() #0 { ; SSE2-LABEL: 'masked_gather' ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) @@ -953,7 +953,7 @@ ret i32 0 } -define i32 @masked_scatter() { +define i32 @masked_scatter() #0 { ; SSE2-LABEL: 'masked_scatter' ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) @@ -1122,7 +1122,7 @@ ret i32 0 } -define i32 @masked_expandload() { +define i32 @masked_expandload() #0 { ; SSE2-LABEL: 'masked_expandload' ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) @@ -1264,7 +1264,7 @@ ret i32 0 } -define i32 @masked_compressstore() { +define i32 @masked_compressstore() #0 { ; SSE2-LABEL: 'masked_compressstore' ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) @@ -1460,7 +1460,7 @@ ret i32 0 } -define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) { +define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) #0 { ; SSE2-LABEL: 'test1' ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) @@ -1486,7 +1486,7 @@ ret <2 x double> %res } -define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) { +define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) #0 { ; SSE2-LABEL: 'test2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) @@ -1512,7 +1512,7 @@ ret <4 x i32> %res } -define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) { +define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) #0 { ; SSE2-LABEL: 'test3' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask) @@ -1538,7 +1538,7 @@ ret void } -define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) { +define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) #0 { ; SSE2-LABEL: 'test4' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) @@ -1574,7 +1574,7 @@ ret <8 x float> %res } -define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { +define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) #0 { ; SSE2-LABEL: 'test5' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) @@ -1600,7 +1600,7 @@ ret void } -define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { +define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) #0 { ; SSE2-LABEL: 'test6' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) @@ -1626,7 +1626,7 @@ ret void } -define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) { +define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) #0 { ; SSE2-LABEL: 'test7' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) @@ -1652,7 +1652,7 @@ ret <2 x float> %res } -define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { +define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) #0 { ; SSE2-LABEL: 'test8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) @@ -1773,7 +1773,7 @@ ret <4 x i32> %res } -define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) { +define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) #0 { ; SSE2-LABEL: 'test_gather_16f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind @@ -1817,7 +1817,7 @@ ret <16 x float>%res } -define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) { +define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) #0 { ; SSE2-LABEL: 'test_gather_16f32_var_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind @@ -1861,7 +1861,7 @@ ret <16 x float>%res } -define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { +define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) #0 { ; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind @@ -1905,7 +1905,7 @@ ret <16 x float>%res } -define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) { +define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) #0 { ; SSE2-LABEL: 'test_gather_16f32_const_mask2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer @@ -1964,7 +1964,7 @@ ret <16 x float>%res } -define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { +define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) #0 { ; SSE2-LABEL: 'test_scatter_16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer @@ -2022,7 +2022,7 @@ ret void } -define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { +define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) #0 { ; SSE2-LABEL: 'test_scatter_8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -2043,7 +2043,7 @@ ret void } -define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { +define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) #0 { ; SSE2-LABEL: 'test_scatter_4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -2068,7 +2068,7 @@ ret void } -define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) { +define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) #0 { ; SSE2-LABEL: 'test_gather_4f32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind @@ -2118,7 +2118,7 @@ ret <4 x float>%res } -define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) { +define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) #0 { ; SSE2-LABEL: 'test_gather_4f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind @@ -2411,3 +2411,5 @@ declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>) declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>) declare void @llvm.masked.compressstore.v8i8(<8 x i8>, i8*, <8 x i1>) + +attributes #0 = { "min-legal-vector-width"="512" } Index: llvm/test/Analysis/CostModel/X86/powi.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/powi.ll +++ llvm/test/Analysis/CostModel/X86/powi.ll @@ -4,7 +4,7 @@ ; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -mtriple=x86_64-linux-gnu -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2 ; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -mtriple=x86_64-linux-gnu -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512 -define i32 @powi_var(i32 %arg) { +define i32 @powi_var(i32 %arg) #0 { ; SSE-LABEL: 'powi_var' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 %arg) ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 %arg) @@ -72,7 +72,7 @@ ret i32 poison } -define i32 @powi_3() { +define i32 @powi_3() #0 { ; SSE-LABEL: 'powi_3' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 3) @@ -140,7 +140,7 @@ ret i32 poison } -define i32 @powi_n3() { +define i32 @powi_n3() #0 { ; SSE-LABEL: 'powi_n3' ; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 -3) ; SSE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 -3) @@ -208,7 +208,7 @@ ret i32 poison } -define i32 @powi_6() { +define i32 @powi_6() #0 { ; SSE-LABEL: 'powi_6' ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 6) ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 6) @@ -276,7 +276,7 @@ ret i32 poison } -define i32 @powi_16() { +define i32 @powi_16() #0 { ; SSE-LABEL: 'powi_16' ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 16) @@ -355,3 +355,5 @@ declare <4 x double> @llvm.powi.v4f64(<4 x double>, i32) declare <8 x double> @llvm.powi.v8f64(<8 x double>, i32) declare <16 x double> @llvm.powi.v16f64(<16 x double>, i32) + +attributes #0 = { "min-legal-vector-width"="512" } Index: llvm/test/CodeGen/X86/avx512-calling-conv.ll =================================================================== --- llvm/test/CodeGen/X86/avx512-calling-conv.ll +++ llvm/test/CodeGen/X86/avx512-calling-conv.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32 ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx -fast-isel | FileCheck %s --check-prefix=FASTISEL -define <16 x i1> @test1() { +define <16 x i1> @test1() #0 { ; ALL_X64-LABEL: test1: ; ALL_X64: ## %bb.0: ; ALL_X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 @@ -22,7 +22,7 @@ ret <16 x i1> zeroinitializer } -define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) { +define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) #0 { ; ALL_X64-LABEL: test2: ; ALL_X64: ## %bb.0: ; ALL_X64-NEXT: vandps %xmm1, %xmm0, %xmm0 @@ -46,7 +46,7 @@ ret <16 x i1> %c } -define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) { +define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) #0 { ; ALL_X64-LABEL: test3: ; ALL_X64: ## %bb.0: ; ALL_X64-NEXT: vandps %xmm1, %xmm0, %xmm0 @@ -70,7 +70,7 @@ ret <8 x i1> %c } -define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) { +define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) #0 { ; ALL_X64-LABEL: test4: ; ALL_X64: ## %bb.0: ; ALL_X64-NEXT: vandps %xmm1, %xmm0, %xmm0 @@ -96,7 +96,7 @@ declare <8 x i1> @func8xi1(<8 x i1> %a) -define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) { +define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) #0 { ; KNL-LABEL: test5: ; KNL: ## %bb.0: ; KNL-NEXT: pushq %rax @@ -160,7 +160,7 @@ declare <16 x i1> @func16xi1(<16 x i1> %a) -define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) { +define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) #0 { ; KNL-LABEL: test6: ; KNL: ## %bb.0: ; KNL-NEXT: pushq %rax @@ -224,7 +224,7 @@ declare <4 x i1> @func4xi1(<4 x i1> %a) -define <4 x i32> @test7(<4 x i32>%a, <4 x i32>%b) { +define <4 x i32> @test7(<4 x i32>%a, <4 x i32>%b) #0 { ; ALL_X64-LABEL: test7: ; ALL_X64: ## %bb.0: ; ALL_X64-NEXT: pushq %rax @@ -265,7 +265,7 @@ ret <4 x i32> %res } -define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) { +define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) #0 { ; KNL-LABEL: test7a: ; KNL: ## %bb.0: ; KNL-NEXT: pushq %rax @@ -319,7 +319,7 @@ ret <8 x i1> %res } -define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) { +define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) #0 { ; ALL_X64-LABEL: test8: ; ALL_X64: ## %bb.0: ; ALL_X64-NEXT: testb $1, %dil @@ -350,7 +350,7 @@ ret <16 x i8> %res } -define i1 @test9(double %a, double %b) { +define i1 @test9(double %a, double %b) #0 { ; ALL_X64-LABEL: test9: ; ALL_X64: ## %bb.0: ; ALL_X64-NEXT: vucomisd %xmm0, %xmm1 @@ -373,7 +373,7 @@ ret i1 %c } -define i32 @test10(i32 %a, i32 %b, i1 %cond) { +define i32 @test10(i32 %a, i32 %b, i1 %cond) #0 { ; ALL_X64-LABEL: test10: ; ALL_X64: ## %bb.0: ; ALL_X64-NEXT: movl %edi, %eax @@ -400,7 +400,7 @@ ret i32 %c } -define i1 @test11(i32 %a, i32 %b) { +define i1 @test11(i32 %a, i32 %b) #0 { ; ALL_X64-LABEL: test11: ; ALL_X64: ## %bb.0: ; ALL_X64-NEXT: cmpl %esi, %edi @@ -423,7 +423,7 @@ ret i1 %c } -define i32 @test12(i32 %a1, i32 %a2, i32 %b1) { +define i32 @test12(i32 %a1, i32 %a2, i32 %b1) #0 { ; ALL_X64-LABEL: test12: ; ALL_X64: ## %bb.0: ; ALL_X64-NEXT: pushq %rbp @@ -519,7 +519,7 @@ ret i32 %res1 } -define <1 x i1> @test13(ptr %foo) { +define <1 x i1> @test13(ptr %foo) #0 { ; KNL-LABEL: test13: ; KNL: ## %bb.0: ; KNL-NEXT: movzbl (%rdi), %eax @@ -550,7 +550,7 @@ ret <1 x i1> %bar } -define void @test14(ptr %x) { +define void @test14(ptr %x) #0 { ; KNL-LABEL: test14: ; KNL: ## %bb.0: ; KNL-NEXT: pushq %rbx @@ -610,7 +610,7 @@ } declare <32 x i16> @test14_callee(<32 x i16>) -define void @test15(ptr %x) { +define void @test15(ptr %x) #0 { ; KNL-LABEL: test15: ; KNL: ## %bb.0: ; KNL-NEXT: pushq %rbx @@ -3539,7 +3539,7 @@ } declare void @v2i1_mem_callee(<128 x i32> %x, <2 x i1> %y) -define void @v2i1_mem(<128 x i32> %x, <2 x i1> %y) { +define void @v2i1_mem(<128 x i32> %x, <2 x i1> %y) #0 { ; KNL-LABEL: v2i1_mem: ; KNL: ## %bb.0: ; KNL-NEXT: subq $24, %rsp @@ -3602,7 +3602,7 @@ } declare void @v4i1_mem_callee(<128 x i32> %x, <4 x i1> %y) -define void @v4i1_mem(<128 x i32> %x, <4 x i1> %y) { +define void @v4i1_mem(<128 x i32> %x, <4 x i1> %y) #0 { ; KNL-LABEL: v4i1_mem: ; KNL: ## %bb.0: ; KNL-NEXT: subq $24, %rsp @@ -3665,7 +3665,7 @@ } declare void @v8i1_mem_callee(<128 x i32> %x, <8 x i1> %y) -define void @v8i1_mem(<128 x i32> %x, <8 x i1> %y) { +define void @v8i1_mem(<128 x i32> %x, <8 x i1> %y) #0 { ; KNL-LABEL: v8i1_mem: ; KNL: ## %bb.0: ; KNL-NEXT: subq $24, %rsp @@ -3728,7 +3728,7 @@ } declare void @v16i1_mem_callee(<128 x i32> %x, <16 x i1> %y) -define void @v16i1_mem(<128 x i32> %x, <16 x i1> %y) { +define void @v16i1_mem(<128 x i32> %x, <16 x i1> %y) #0 { ; KNL-LABEL: v16i1_mem: ; KNL: ## %bb.0: ; KNL-NEXT: subq $24, %rsp @@ -3791,7 +3791,7 @@ } declare void @v32i1_mem_callee(<128 x i32> %x, <32 x i1> %y) -define void @v32i1_mem(<128 x i32> %x, <32 x i1> %y) { +define void @v32i1_mem(<128 x i32> %x, <32 x i1> %y) #0 { ; KNL-LABEL: v32i1_mem: ; KNL: ## %bb.0: ; KNL-NEXT: pushq %rbp @@ -3872,7 +3872,7 @@ } declare void @v64i1_mem_callee(<128 x i32> %x, <64 x i1> %y) -define void @v64i1_mem(<128 x i32> %x, <64 x i1> %y) { +define void @v64i1_mem(<128 x i32> %x, <64 x i1> %y) #0 { ; KNL-LABEL: v64i1_mem: ; KNL: ## %bb.0: ; KNL-NEXT: subq $472, %rsp ## imm = 0x1D8 @@ -4185,3 +4185,5 @@ call void @v64i1_mem_callee(<128 x i32> %x, <64 x i1> %y) ret void } + +attributes #0 = { "min-legal-vector-width"="512" } Index: llvm/test/CodeGen/X86/avx512bw-mask-op.ll =================================================================== --- llvm/test/CodeGen/X86/avx512bw-mask-op.ll +++ llvm/test/CodeGen/X86/avx512bw-mask-op.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -define i32 @mask32(i32 %x) { +define i32 @mask32(i32 %x) #0 { ; CHECK-LABEL: mask32: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax @@ -16,7 +16,7 @@ ret i32 %ret } -define i64 @mask64(i64 %x) { +define i64 @mask64(i64 %x) #0 { ; CHECK-LABEL: mask64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rdi, %rax @@ -35,7 +35,7 @@ ret i64 %ret } -define void @mask32_mem(ptr %ptr) { +define void @mask32_mem(ptr %ptr) #0 { ; CHECK-LABEL: mask32_mem: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovd (%rdi), %k0 @@ -53,7 +53,7 @@ ret void } -define void @mask64_mem(ptr %ptr) { +define void @mask64_mem(ptr %ptr) #0 { ; CHECK-LABEL: mask64_mem: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovq (%rdi), %k0 @@ -75,7 +75,7 @@ ret void } -define i32 @mand32(i32 %x, i32 %y) { +define i32 @mand32(i32 %x, i32 %y) #0 { ; CHECK-LABEL: mand32: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax @@ -90,7 +90,7 @@ ret i32 %ret } -define i32 @mand32_mem(ptr %x, ptr %y) { +define i32 @mand32_mem(ptr %x, ptr %y) #0 { ; CHECK-LABEL: mand32_mem: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovd (%rdi), %k0 @@ -107,7 +107,7 @@ ret i32 %ret } -define i64 @mand64(i64 %x, i64 %y) { +define i64 @mand64(i64 %x, i64 %y) #0 { ; CHECK-LABEL: mand64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rdi, %rax @@ -122,7 +122,7 @@ ret i64 %ret } -define i64 @mand64_mem(ptr %x, ptr %y) { +define i64 @mand64_mem(ptr %x, ptr %y) #0 { ; CHECK-LABEL: mand64_mem: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovq (%rdi), %k0 @@ -139,7 +139,7 @@ ret i64 %ret } -define i32 @test_v32i1_add(i32 %x, i32 %y) { +define i32 @test_v32i1_add(i32 %x, i32 %y) #0 { ; CHECK-LABEL: test_v32i1_add: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax @@ -152,7 +152,7 @@ ret i32 %ret } -define i32 @test_v32i1_sub(i32 %x, i32 %y) { +define i32 @test_v32i1_sub(i32 %x, i32 %y) #0 { ; CHECK-LABEL: test_v32i1_sub: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax @@ -165,7 +165,7 @@ ret i32 %ret } -define i32 @test_v32i1_mul(i32 %x, i32 %y) { +define i32 @test_v32i1_mul(i32 %x, i32 %y) #0 { ; CHECK-LABEL: test_v32i1_mul: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax @@ -178,7 +178,7 @@ ret i32 %ret } -define i64 @test_v64i1_add(i64 %x, i64 %y) { +define i64 @test_v64i1_add(i64 %x, i64 %y) #0 { ; CHECK-LABEL: test_v64i1_add: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rdi, %rax @@ -191,7 +191,7 @@ ret i64 %ret } -define i64 @test_v64i1_sub(i64 %x, i64 %y) { +define i64 @test_v64i1_sub(i64 %x, i64 %y) #0 { ; CHECK-LABEL: test_v64i1_sub: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rdi, %rax @@ -204,7 +204,7 @@ ret i64 %ret } -define i64 @test_v64i1_mul(i64 %x, i64 %y) { +define i64 @test_v64i1_mul(i64 %x, i64 %y) #0 { ; CHECK-LABEL: test_v64i1_mul: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rdi, %rax @@ -217,7 +217,7 @@ ret i64 %ret } -define <32 x i1> @bitcast_f32_to_v32i1(float %x) { +define <32 x i1> @bitcast_f32_to_v32i1(float %x) #0 { ; CHECK-LABEL: bitcast_f32_to_v32i1: ; CHECK: ## %bb.0: ; CHECK-NEXT: vmovd %xmm0, %eax @@ -228,7 +228,7 @@ ret <32 x i1> %a } -define <64 x i1> @bitcast_f64_to_v64i1(double %x) { +define <64 x i1> @bitcast_f64_to_v64i1(double %x) #0 { ; CHECK-LABEL: bitcast_f64_to_v64i1: ; CHECK: ## %bb.0: ; CHECK-NEXT: vmovq %xmm0, %rax @@ -239,7 +239,7 @@ ret <64 x i1> %a } -define float @bitcast_v32i1_to_f32(<32 x i1> %x) { +define float @bitcast_v32i1_to_f32(<32 x i1> %x) #0 { ; CHECK-LABEL: bitcast_v32i1_to_f32: ; CHECK: ## %bb.0: ; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 @@ -251,7 +251,7 @@ ret float %a } -define double @bitcast_v64i1_to_f64(<64 x i1> %x) { +define double @bitcast_v64i1_to_f64(<64 x i1> %x) #0 { ; CHECK-LABEL: bitcast_v64i1_to_f64: ; CHECK: ## %bb.0: ; CHECK-NEXT: vpsllw $7, %zmm0, %zmm0 @@ -264,3 +264,5 @@ ret double %a } + +attributes #0 = { "min-legal-vector-width" = "512" } Index: llvm/test/CodeGen/X86/avx512fp16-subv-broadcast-fp16.ll =================================================================== --- llvm/test/CodeGen/X86/avx512fp16-subv-broadcast-fp16.ll +++ llvm/test/CodeGen/X86/avx512fp16-subv-broadcast-fp16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512fp16 | FileCheck %s -define dso_local void @test_v8f16_v32f16(ptr %x_addr, ptr %y_addr) { +define dso_local void @test_v8f16_v32f16(ptr %x_addr, ptr %y_addr) #0 { ; CHECK-LABEL: test_v8f16_v32f16: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] @@ -15,7 +15,7 @@ ret void } -define dso_local void @test_v8f16_v16f16(ptr %x_addr, ptr %y_addr) { +define dso_local void @test_v8f16_v16f16(ptr %x_addr, ptr %y_addr) #0 { ; CHECK-LABEL: test_v8f16_v16f16: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] @@ -29,7 +29,7 @@ ret void } -define dso_local void @test_v16f16_v32f16(ptr %x_addr, ptr %y_addr) { +define dso_local void @test_v16f16_v32f16(ptr %x_addr, ptr %y_addr) #0 { ; CHECK-LABEL: test_v16f16_v32f16: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3] @@ -42,3 +42,5 @@ store <32 x half> %shuffle.i58, ptr %y_addr, align 64 ret void } + +attributes #0 = { "min-legal-vector-width" = "512" } Index: llvm/test/CodeGen/X86/perm.avx512-false-deps.ll =================================================================== --- llvm/test/CodeGen/X86/perm.avx512-false-deps.ll +++ llvm/test/CodeGen/X86/perm.avx512-false-deps.ll @@ -2,7 +2,7 @@ ; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=+false-deps-perm -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=ENABLE ; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=-false-deps-perm -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=DISABLE -define <4 x i64> @permq_ri_256(<4 x i64> %a0) { +define <4 x i64> @permq_ri_256(<4 x i64> %a0) #0 { ; ENABLE-LABEL: permq_ri_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -27,7 +27,7 @@ ret <4 x i64> %res } -define <4 x i64> @permq_rr_256(<4 x i64> %a0, <4 x i64> %idx) { +define <4 x i64> @permq_rr_256(<4 x i64> %a0, <4 x i64> %idx) #0 { ; ENABLE-LABEL: permq_rr_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -59,7 +59,7 @@ ret <4 x i64> %res } -define <4 x i64> @permq_rm_256(ptr %p0, <4 x i64> %idx) { +define <4 x i64> @permq_rm_256(ptr %p0, <4 x i64> %idx) #0 { ; ENABLE-LABEL: permq_rm_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -85,7 +85,7 @@ ret <4 x i64> %res } -define <4 x i64> @permq_mi_256(ptr %p0) { +define <4 x i64> @permq_mi_256(ptr %p0) #0 { ; ENABLE-LABEL: permq_mi_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -108,7 +108,7 @@ ret <4 x i64> %2 } -define <4 x i64> @permq_broadcast_256(ptr %p0, <4 x i64> %idx) { +define <4 x i64> @permq_broadcast_256(ptr %p0, <4 x i64> %idx) #0 { ; ENABLE-LABEL: permq_broadcast_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -140,7 +140,7 @@ ret <4 x i64> %res } -define <4 x i64> @permq_maskz_256(<4 x i64> %a0, <4 x i64> %idx, ptr %mask) { +define <4 x i64> @permq_maskz_256(<4 x i64> %a0, <4 x i64> %idx, ptr %mask) #0 { ; ENABLE-LABEL: permq_maskz_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -174,7 +174,7 @@ declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>) declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) -define <8 x i64> @permq_rr_512(<8 x i64> %a0, <8 x i64> %idx) { +define <8 x i64> @permq_rr_512(<8 x i64> %a0, <8 x i64> %idx) #0 { ; ENABLE-LABEL: permq_rr_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill @@ -206,7 +206,7 @@ ret <8 x i64> %res } -define <8 x i64> @permq_rm_512(ptr %p0, <8 x i64> %idx) { +define <8 x i64> @permq_rm_512(ptr %p0, <8 x i64> %idx) #0 { ; ENABLE-LABEL: permq_rm_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -232,7 +232,7 @@ ret <8 x i64> %res } -define <8 x i64> @permq_broadcast_512(ptr %p0, <8 x i64> %idx) { +define <8 x i64> @permq_broadcast_512(ptr %p0, <8 x i64> %idx) #0 { ; ENABLE-LABEL: permq_broadcast_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill @@ -264,7 +264,7 @@ ret <8 x i64> %res } -define <8 x i64> @permq_maskz_512(<8 x i64> %a0, <8 x i64> %idx, ptr %mask) { +define <8 x i64> @permq_maskz_512(<8 x i64> %a0, <8 x i64> %idx, ptr %mask) #0 { ; ENABLE-LABEL: permq_maskz_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -298,7 +298,7 @@ declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>) declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) -define <8 x i32> @permd_rr_256(<8 x i32> %a0, <8 x i32> %idx) { +define <8 x i32> @permd_rr_256(<8 x i32> %a0, <8 x i32> %idx) #0 { ; ENABLE-LABEL: permd_rr_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -330,7 +330,7 @@ ret <8 x i32> %res } -define <8 x i32> @permd_rm_256(ptr %p0, <8 x i32> %idx) { +define <8 x i32> @permd_rm_256(ptr %p0, <8 x i32> %idx) #0 { ; ENABLE-LABEL: permd_rm_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -356,7 +356,7 @@ ret <8 x i32> %res } -define <8 x i32> @permd_broadcast_256(ptr %p0, <8 x i32> %idx) { +define <8 x i32> @permd_broadcast_256(ptr %p0, <8 x i32> %idx) #0 { ; ENABLE-LABEL: permd_broadcast_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -388,7 +388,7 @@ ret <8 x i32> %res } -define <8 x i32> @permd_maskz_256(<8 x i32> %a0, <8 x i32> %idx, ptr %mask) { +define <8 x i32> @permd_maskz_256(<8 x i32> %a0, <8 x i32> %idx, ptr %mask) #0 { ; ENABLE-LABEL: permd_maskz_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -421,7 +421,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) -define <16 x i32> @permd_rr_512(<16 x i32> %a0, <16 x i32> %idx) { +define <16 x i32> @permd_rr_512(<16 x i32> %a0, <16 x i32> %idx) #0 { ; ENABLE-LABEL: permd_rr_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill @@ -453,7 +453,7 @@ ret <16 x i32> %res } -define <16 x i32> @permd_rm_512(ptr %p0, <16 x i32> %idx) { +define <16 x i32> @permd_rm_512(ptr %p0, <16 x i32> %idx) #0 { ; ENABLE-LABEL: permd_rm_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -479,7 +479,7 @@ ret <16 x i32> %res } -define <16 x i32> @permd_broadcast_512(ptr %p0, <16 x i32> %idx) { +define <16 x i32> @permd_broadcast_512(ptr %p0, <16 x i32> %idx) #0 { ; ENABLE-LABEL: permd_broadcast_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill @@ -511,7 +511,7 @@ ret <16 x i32> %res } -define <16 x i32> @permd_maskz_512(<16 x i32> %a0, <16 x i32> %idx, ptr %mask) { +define <16 x i32> @permd_maskz_512(<16 x i32> %a0, <16 x i32> %idx, ptr %mask) #0 { ; ENABLE-LABEL: permd_maskz_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -544,7 +544,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) -define <4 x double> @permpd_ri_256(<4 x double> %a0) { +define <4 x double> @permpd_ri_256(<4 x double> %a0) #0 { ; ENABLE-LABEL: permpd_ri_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -569,7 +569,7 @@ ret <4 x double> %res } -define <4 x double> @permpd_rr_256(<4 x double> %a0, <4 x i64> %idx) { +define <4 x double> @permpd_rr_256(<4 x double> %a0, <4 x i64> %idx) #0 { ; ENABLE-LABEL: permpd_rr_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -606,7 +606,7 @@ ret <4 x double> %res } -define <4 x double> @permpd_rm_256(ptr %p0, <4 x i64> %idx) { +define <4 x double> @permpd_rm_256(ptr %p0, <4 x i64> %idx) #0 { ; ENABLE-LABEL: permpd_rm_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -639,7 +639,7 @@ ret <4 x double> %res } -define <4 x double> @permpd_mi_256(ptr %p0) { +define <4 x double> @permpd_mi_256(ptr %p0) #0 { ; ENABLE-LABEL: permpd_mi_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -662,7 +662,7 @@ ret <4 x double> %2 } -define <4 x double> @permpd_broadcast_256(ptr %p0, <4 x i64> %idx) { +define <4 x double> @permpd_broadcast_256(ptr %p0, <4 x i64> %idx) #0 { ; ENABLE-LABEL: permpd_broadcast_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -697,7 +697,7 @@ ret <4 x double> %res } -define <4 x double> @permpd_maskz_256(<4 x double> %a0, <4 x i64> %idx, ptr %mask) { +define <4 x double> @permpd_maskz_256(<4 x double> %a0, <4 x i64> %idx, ptr %mask) #0 { ; ENABLE-LABEL: permpd_maskz_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -734,7 +734,7 @@ declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>) declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8) -define <8 x double> @permpd_rr_512(<8 x double> %a0, <8 x i64> %idx) { +define <8 x double> @permpd_rr_512(<8 x double> %a0, <8 x i64> %idx) #0 { ; ENABLE-LABEL: permpd_rr_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill @@ -771,7 +771,7 @@ ret <8 x double> %res } -define <8 x double> @permpd_rm_512(ptr %p0, <8 x i64> %idx) { +define <8 x double> @permpd_rm_512(ptr %p0, <8 x i64> %idx) #0 { ; ENABLE-LABEL: permpd_rm_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill @@ -804,7 +804,7 @@ ret <8 x double> %res } -define <8 x double> @permpd_broadcast_512(ptr %p0, <8 x i64> %idx) { +define <8 x double> @permpd_broadcast_512(ptr %p0, <8 x i64> %idx) #0 { ; ENABLE-LABEL: permpd_broadcast_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill @@ -839,7 +839,7 @@ ret <8 x double> %res } -define <8 x double> @permpd_maskz_512(<8 x double> %a0, <8 x i64> %idx, ptr %mask) { +define <8 x double> @permpd_maskz_512(<8 x double> %a0, <8 x i64> %idx, ptr %mask) #0 { ; ENABLE-LABEL: permpd_maskz_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -877,7 +877,7 @@ declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8) -define <8 x float> @permps_rr_256(<8 x float> %a0, <8 x i32> %idx) { +define <8 x float> @permps_rr_256(<8 x float> %a0, <8 x i32> %idx) #0 { ; ENABLE-LABEL: permps_rr_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -914,7 +914,7 @@ ret <8 x float> %res } -define <8 x float> @permps_rm_256(ptr %p0, <8 x i32> %idx) { +define <8 x float> @permps_rm_256(ptr %p0, <8 x i32> %idx) #0 { ; ENABLE-LABEL: permps_rm_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -947,7 +947,7 @@ ret <8 x float> %res } -define <8 x float> @permps_broadcast_256(ptr %p0, <8 x i32> %idx) { +define <8 x float> @permps_broadcast_256(ptr %p0, <8 x i32> %idx) #0 { ; ENABLE-LABEL: permps_broadcast_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -982,7 +982,7 @@ ret <8 x float> %res } -define <8 x float> @permps_maskz_256(<8 x float> %a0, <8 x i32> %idx, ptr %mask) { +define <8 x float> @permps_maskz_256(<8 x float> %a0, <8 x i32> %idx, ptr %mask) #0 { ; ENABLE-LABEL: permps_maskz_256: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -1018,7 +1018,7 @@ declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8) -define <16 x float> @permps_rr_512(<16 x float> %a0, <16 x i32> %idx) { +define <16 x float> @permps_rr_512(<16 x float> %a0, <16 x i32> %idx) #0 { ; ENABLE-LABEL: permps_rr_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill @@ -1055,7 +1055,7 @@ ret <16 x float> %res } -define <16 x float> @permps_rm_512(ptr %p0, <16 x i32> %idx) { +define <16 x float> @permps_rm_512(ptr %p0, <16 x i32> %idx) #0 { ; ENABLE-LABEL: permps_rm_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill @@ -1088,7 +1088,7 @@ ret <16 x float> %res } -define <16 x float> @permps_broadcast_512(ptr %p0, <16 x i32> %idx) { +define <16 x float> @permps_broadcast_512(ptr %p0, <16 x i32> %idx) #0 { ; ENABLE-LABEL: permps_broadcast_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill @@ -1123,7 +1123,7 @@ ret <16 x float> %res } -define <16 x float> @permps_maskz_512(<16 x float> %a0, <16 x i32> %idx, ptr %mask) { +define <16 x float> @permps_maskz_512(<16 x float> %a0, <16 x i32> %idx, ptr %mask) #0 { ; ENABLE-LABEL: permps_maskz_512: ; ENABLE: # %bb.0: ; ENABLE-NEXT: #APP @@ -1159,3 +1159,5 @@ declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>) declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16) + +attributes #0 = { "min-legal-vector-width" = "512" } Index: llvm/test/CodeGen/X86/pr47299.ll =================================================================== --- llvm/test/CodeGen/X86/pr47299.ll +++ llvm/test/CodeGen/X86/pr47299.ll @@ -8,7 +8,7 @@ declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) declare <64 x i1> @llvm.get.active.lane.mask.v64i1.i32(i32, i32) -define <7 x i1> @create_mask7(i64 %0) { +define <7 x i1> @create_mask7(i64 %0) #0 { ; CHECK-LABEL: create_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: mov rax, rdi @@ -53,7 +53,7 @@ ret <7 x i1> %2 } -define <16 x i1> @create_mask16(i64 %0) { +define <16 x i1> @create_mask16(i64 %0) #0 { ; CHECK-LABEL: create_mask16: ; CHECK: # %bb.0: ; CHECK-NEXT: vpbroadcastq zmm0, rdi @@ -67,7 +67,7 @@ ret <16 x i1> %2 } -define <32 x i1> @create_mask32(i64 %0) { +define <32 x i1> @create_mask32(i64 %0) #0 { ; CHECK-LABEL: create_mask32: ; CHECK: # %bb.0: ; CHECK-NEXT: vpbroadcastq zmm0, rdi @@ -84,7 +84,7 @@ ret <32 x i1> %2 } -define <64 x i1> @create_mask64(i64 %0) { +define <64 x i1> @create_mask64(i64 %0) #0 { ; CHECK-LABEL: create_mask64: ; CHECK: # %bb.0: ; CHECK-NEXT: vpbroadcastq zmm0, rdi @@ -109,7 +109,7 @@ ret <64 x i1> %2 } -define <16 x i1> @create_mask16_i32(i32 %0) { +define <16 x i1> @create_mask16_i32(i32 %0) #0 { ; CHECK-LABEL: create_mask16_i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vpbroadcastd zmm0, edi @@ -121,7 +121,7 @@ ret <16 x i1> %2 } -define <64 x i1> @create_mask64_i32(i32 %0) { +define <64 x i1> @create_mask64_i32(i32 %0) #0 { ; CHECK-LABEL: create_mask64_i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vpbroadcastd zmm0, edi @@ -137,3 +137,5 @@ %2 = call <64 x i1> @llvm.get.active.lane.mask.v64i1.i32(i32 0, i32 %0) ret <64 x i1> %2 } + +attributes #0 = { "min-legal-vector-width" = "512" } Index: llvm/test/CodeGen/X86/pr48727.ll =================================================================== --- llvm/test/CodeGen/X86/pr48727.ll +++ llvm/test/CodeGen/X86/pr48727.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-- -mcpu=skx | FileCheck %s -define void @PR48727() { +define void @PR48727() #0 { ; CHECK-LABEL: PR48727: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vcvttpd2dqy 0, %xmm0 @@ -43,3 +43,5 @@ !1 = !{!2} !2 = !{!"buffer: {index:1, offset:0, size:20000}", !3} !3 = !{!"XLA global AA domain"} + +attributes #0 = { "min-legal-vector-width" = "512" } Index: llvm/test/CodeGen/X86/vector-shuffle-avx512.ll =================================================================== --- llvm/test/CodeGen/X86/vector-shuffle-avx512.ll +++ llvm/test/CodeGen/X86/vector-shuffle-avx512.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=knl | FileCheck %s --check-prefixes=CHECK,KNL,X86,KNL32 ;expand 128 -> 256 include <4 x float> <2 x double> -define <8 x float> @expand(<4 x float> %a) { +define <8 x float> @expand(<4 x float> %a) #0 { ; SKX-LABEL: expand: ; SKX: # %bb.0: ; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 @@ -24,7 +24,7 @@ ret <8 x float> %res } -define <8 x float> @expand1(<4 x float> %a ) { +define <8 x float> @expand1(<4 x float> %a ) #0 { ; SKX-LABEL: expand1: ; SKX: # %bb.0: ; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 @@ -46,7 +46,7 @@ } ;Expand 128 -> 256 test <2 x double> -> <4 x double> -define <4 x double> @expand2(<2 x double> %a) { +define <4 x double> @expand2(<2 x double> %a) #0 { ; CHECK-LABEL: expand2: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 @@ -59,7 +59,7 @@ } ;expand 128 -> 256 include case <4 x i32> <8 x i32> -define <8 x i32> @expand3(<4 x i32> %a ) { +define <8 x i32> @expand3(<4 x i32> %a ) #0 { ; SKX-LABEL: expand3: ; SKX: # %bb.0: ; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 @@ -79,7 +79,7 @@ } ;expand 128 -> 256 include case <2 x i64> <4 x i64> -define <4 x i64> @expand4(<2 x i64> %a ) { +define <4 x i64> @expand4(<2 x i64> %a ) #0 { ; SKX-LABEL: expand4: ; SKX: # %bb.0: ; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 @@ -100,7 +100,7 @@ } ;Negative test for 128-> 256 -define <8 x float> @expand5(<4 x float> %a ) { +define <8 x float> @expand5(<4 x float> %a ) #0 { ; SKX-LABEL: expand5: ; SKX: # %bb.0: ; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 @@ -120,7 +120,7 @@ } ;expand 256 -> 512 include <8 x float> <16 x float> -define <8 x float> @expand6(<4 x float> %a ) { +define <8 x float> @expand6(<4 x float> %a ) #0 { ; CHECK-LABEL: expand6: ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -130,7 +130,7 @@ ret <8 x float> %res } -define <16 x float> @expand7(<8 x float> %a) { +define <16 x float> @expand7(<8 x float> %a) #0 { ; SKX-LABEL: expand7: ; SKX: # %bb.0: ; SKX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -150,7 +150,7 @@ ret <16 x float> %res } -define <16 x float> @expand8(<8 x float> %a ) { +define <16 x float> @expand8(<8 x float> %a ) #0 { ; SKX-LABEL: expand8: ; SKX: # %bb.0: ; SKX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -171,7 +171,7 @@ } ;expand 256 -> 512 include <4 x double> <8 x double> -define <8 x double> @expand9(<4 x double> %a) { +define <8 x double> @expand9(<4 x double> %a) #0 { ; SKX-LABEL: expand9: ; SKX: # %bb.0: ; SKX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -191,7 +191,7 @@ ret <8 x double> %res } -define <16 x i32> @expand10(<8 x i32> %a ) { +define <16 x i32> @expand10(<8 x i32> %a ) #0 { ; SKX-LABEL: expand10: ; SKX: # %bb.0: ; SKX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -211,7 +211,7 @@ ret <16 x i32> %res } -define <8 x i64> @expand11(<4 x i64> %a) { +define <8 x i64> @expand11(<4 x i64> %a) #0 { ; SKX-LABEL: expand11: ; SKX: # %bb.0: ; SKX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -232,7 +232,7 @@ } ;Negative test for 256-> 512 -define <16 x float> @expand12(<8 x float> %a) { +define <16 x float> @expand12(<8 x float> %a) #0 { ; CHECK-LABEL: expand12: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -245,7 +245,7 @@ ret <16 x float> %res } -define <16 x float> @expand13(<8 x float> %a ) { +define <16 x float> @expand13(<8 x float> %a ) #0 { ; CHECK-LABEL: expand13: ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -257,7 +257,7 @@ ; The function checks for a case where the vector is mixed values vector ,and the mask points on zero elements from this vector. -define <8 x float> @expand14(<4 x float> %a) { +define <8 x float> @expand14(<4 x float> %a) #0 { ; SKX-LABEL: expand14: ; SKX: # %bb.0: ; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 @@ -280,7 +280,7 @@ } ;Negative test. -define <8 x float> @expand15(<4 x float> %a) { +define <8 x float> @expand15(<4 x float> %a) #0 { ; SKX-LABEL: expand15: ; SKX: # %bb.0: ; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 @@ -466,7 +466,7 @@ } ; PR34370 -define <8 x float> @test_masked_permps_v8f32(ptr %vp, <8 x float> %vec2) { +define <8 x float> @test_masked_permps_v8f32(ptr %vp, <8 x float> %vec2) #0 { ; SKX64-LABEL: test_masked_permps_v8f32: ; SKX64: # %bb.0: ; SKX64-NEXT: vmovaps (%rdi), %ymm2 @@ -508,7 +508,7 @@ ret <8 x float> %res } -define <16 x float> @test_masked_permps_v16f32(ptr %vp, <16 x float> %vec2) { +define <16 x float> @test_masked_permps_v16f32(ptr %vp, <16 x float> %vec2) #0 { ; X64-LABEL: test_masked_permps_v16f32: ; X64: # %bb.0: ; X64-NEXT: vmovaps (%rdi), %zmm2 @@ -531,7 +531,7 @@ ret <16 x float> %res } -define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) { +define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) #0 { ; SKX64-LABEL: test_demandedelts_pshufb_v32i8_v16i8: ; SKX64: # %bb.0: ; SKX64-NEXT: vpbroadcastd 44(%rdi), %xmm0 @@ -594,7 +594,7 @@ ret void } -define <32 x float> @PR47534(<8 x float> %tmp) { +define <32 x float> @PR47534(<8 x float> %tmp) #0 { ; CHECK-LABEL: PR47534: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 @@ -612,7 +612,7 @@ %union1= type { <16 x float> } @src1 = external dso_local local_unnamed_addr global %union1, align 64 -define void @PR43170(ptr %a0) { +define void @PR43170(ptr %a0) #0 { ; SKX64-LABEL: PR43170: ; SKX64: # %bb.0: # %entry ; SKX64-NEXT: vmovaps src1(%rip), %ymm0 @@ -646,3 +646,5 @@ store <16 x float> %1, ptr %a0, align 64 ret void } + +attributes #0 = { "min-legal-vector-width" = "512" } Index: llvm/test/CodeGen/X86/vector-trunc-usat.ll =================================================================== --- llvm/test/CodeGen/X86/vector-trunc-usat.ll +++ llvm/test/CodeGen/X86/vector-trunc-usat.ll @@ -19,7 +19,7 @@ ; Unsigned saturation truncation to vXi32 ; -define <2 x i32> @trunc_usat_v2i64_v2i32(<2 x i64> %a0) { +define <2 x i32> @trunc_usat_v2i64_v2i32(<2 x i64> %a0) #0 { ; SSE2-LABEL: trunc_usat_v2i64_v2i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] @@ -119,7 +119,7 @@ ret <2 x i32> %3 } -define void @trunc_usat_v2i64_v2i32_store(<2 x i64> %a0, ptr %p1) { +define void @trunc_usat_v2i64_v2i32_store(<2 x i64> %a0, ptr %p1) #0 { ; SSE2-LABEL: trunc_usat_v2i64_v2i32_store: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] @@ -224,7 +224,7 @@ ret void } -define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) { +define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) #0 { ; SSE2-LABEL: trunc_usat_v4i64_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] @@ -430,7 +430,7 @@ ret <4 x i32> %3 } -define <8 x i32> @trunc_usat_v8i64_v8i32(ptr %p0) { +define <8 x i32> @trunc_usat_v8i64_v8i32(ptr %p0) #0 { ; SSE2-LABEL: trunc_usat_v8i64_v8i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa (%rdi), %xmm2 @@ -717,7 +717,7 @@ ; Unsigned saturation truncation to vXi16 ; -define <2 x i16> @trunc_usat_v2i64_v2i16(<2 x i64> %a0) { +define <2 x i16> @trunc_usat_v2i64_v2i16(<2 x i64> %a0) #0 { ; SSE2-LABEL: trunc_usat_v2i64_v2i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] @@ -840,7 +840,7 @@ ret <2 x i16> %3 } -define void @trunc_usat_v2i64_v2i16_store(<2 x i64> %a0, ptr %p1) { +define void @trunc_usat_v2i64_v2i16_store(<2 x i64> %a0, ptr %p1) #0 { ; SSE2-LABEL: trunc_usat_v2i64_v2i16_store: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] @@ -972,7 +972,7 @@ ret void } -define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) { +define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) #0 { ; SSE2-LABEL: trunc_usat_v4i64_v4i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535] @@ -1145,7 +1145,7 @@ ret <4 x i16> %3 } -define void @trunc_usat_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) { +define void @trunc_usat_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) #0 { ; SSE2-LABEL: trunc_usat_v4i64_v4i16_store: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535] @@ -1325,7 +1325,7 @@ ret void } -define <8 x i16> @trunc_usat_v8i64_v8i16(ptr %p0) { +define <8 x i16> @trunc_usat_v8i64_v8i16(ptr %p0) #0 { ; SSE2-LABEL: trunc_usat_v8i64_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa (%rdi), %xmm4 @@ -1594,7 +1594,7 @@ ret <8 x i16> %3 } -define <4 x i16> @trunc_usat_v4i32_v4i16(<4 x i32> %a0) { +define <4 x i16> @trunc_usat_v4i32_v4i16(<4 x i32> %a0) #0 { ; SSE2-LABEL: trunc_usat_v4i32_v4i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] @@ -1676,7 +1676,7 @@ ret <4 x i16> %3 } -define void @trunc_usat_v4i32_v4i16_store(<4 x i32> %a0, ptr%p1) { +define void @trunc_usat_v4i32_v4i16_store(<4 x i32> %a0, ptr%p1) #0 { ; SSE2-LABEL: trunc_usat_v4i32_v4i16_store: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] @@ -1764,7 +1764,7 @@ ret void } -define <8 x i16> @trunc_usat_v8i32_v8i16(<8 x i32> %a0) { +define <8 x i16> @trunc_usat_v8i32_v8i16(<8 x i32> %a0) #0 { ; SSE2-LABEL: trunc_usat_v8i32_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] @@ -1878,7 +1878,7 @@ ret <8 x i16> %3 } -define <16 x i16> @trunc_usat_v16i32_v16i16(ptr %p0) { +define <16 x i16> @trunc_usat_v16i32_v16i16(ptr %p0) #0 { ; SSE2-LABEL: trunc_usat_v16i32_v16i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa (%rdi), %xmm5 @@ -2030,7 +2030,7 @@ ; Unsigned saturation truncation to vXi8 ; -define <2 x i8> @trunc_usat_v2i64_v2i8(<2 x i64> %a0) { +define <2 x i8> @trunc_usat_v2i64_v2i8(<2 x i64> %a0) #0 { ; SSE2-LABEL: trunc_usat_v2i64_v2i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] @@ -2132,7 +2132,7 @@ ret <2 x i8> %3 } -define void @trunc_usat_v2i64_v2i8_store(<2 x i64> %a0, ptr %p1) { +define void @trunc_usat_v2i64_v2i8_store(<2 x i64> %a0, ptr %p1) #0 { ; SSE2-LABEL: trunc_usat_v2i64_v2i8_store: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] @@ -2242,7 +2242,7 @@ ret void } -define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) { +define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) #0 { ; SSE2-LABEL: trunc_usat_v4i64_v4i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255] @@ -2419,7 +2419,7 @@ ret <4 x i8> %3 } -define void @trunc_usat_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) { +define void @trunc_usat_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) #0 { ; SSE2-LABEL: trunc_usat_v4i64_v4i8_store: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255] @@ -2603,7 +2603,7 @@ ret void } -define <8 x i8> @trunc_usat_v8i64_v8i8(ptr %p0) { +define <8 x i8> @trunc_usat_v8i64_v8i8(ptr %p0) #0 { ; SSE2-LABEL: trunc_usat_v8i64_v8i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa (%rdi), %xmm6 @@ -2861,7 +2861,7 @@ ret <8 x i8> %3 } -define void @trunc_usat_v8i64_v8i8_store(ptr %p0, ptr%p1) { +define void @trunc_usat_v8i64_v8i8_store(ptr %p0, ptr%p1) #0 { ; SSE2-LABEL: trunc_usat_v8i64_v8i8_store: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa (%rdi), %xmm6 @@ -3124,7 +3124,7 @@ ret void } -define <16 x i8> @trunc_usat_v16i64_v16i8(ptr %p0) { +define <16 x i8> @trunc_usat_v16i64_v16i8(ptr %p0) #0 { ; SSE2-LABEL: trunc_usat_v16i64_v16i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa 96(%rdi), %xmm1 @@ -3587,7 +3587,7 @@ ret <16 x i8> %3 } -define <4 x i8> @trunc_usat_v4i32_v4i8(<4 x i32> %a0) { +define <4 x i8> @trunc_usat_v4i32_v4i8(<4 x i32> %a0) #0 { ; SSE2-LABEL: trunc_usat_v4i32_v4i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] @@ -3670,7 +3670,7 @@ ret <4 x i8> %3 } -define void @trunc_usat_v4i32_v4i8_store(<4 x i32> %a0, ptr%p1) { +define void @trunc_usat_v4i32_v4i8_store(<4 x i32> %a0, ptr%p1) #0 { ; SSE2-LABEL: trunc_usat_v4i32_v4i8_store: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] @@ -3769,7 +3769,7 @@ ret void } -define <8 x i8> @trunc_usat_v8i32_v8i8(<8 x i32> %a0) { +define <8 x i8> @trunc_usat_v8i32_v8i8(<8 x i32> %a0) #0 { ; SSE2-LABEL: trunc_usat_v8i32_v8i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255] @@ -3879,7 +3879,7 @@ ret <8 x i8> %3 } -define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, ptr%p1) { +define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, ptr%p1) #0 { ; SSE2-LABEL: trunc_usat_v8i32_v8i8_store: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255] @@ -3997,7 +3997,7 @@ ret void } -define <16 x i8> @trunc_usat_v16i32_v16i8(ptr %p0) { +define <16 x i8> @trunc_usat_v16i32_v16i8(ptr %p0) #0 { ; SSE2-LABEL: trunc_usat_v16i32_v16i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa (%rdi), %xmm6 @@ -4137,7 +4137,7 @@ ret <16 x i8> %3 } -define void @trunc_usat_v16i32_v16i8_store(ptr %p0, ptr %p1) { +define void @trunc_usat_v16i32_v16i8_store(ptr %p0, ptr %p1) #0 { ; SSE2-LABEL: trunc_usat_v16i32_v16i8_store: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa (%rdi), %xmm6 @@ -4283,7 +4283,7 @@ ret void } -define <8 x i8> @trunc_usat_v8i16_v8i8(<8 x i16> %a0) { +define <8 x i8> @trunc_usat_v8i16_v8i8(<8 x i16> %a0) #0 { ; SSE2-LABEL: trunc_usat_v8i16_v8i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 @@ -4347,7 +4347,7 @@ ret <8 x i8> %3 } -define void @trunc_usat_v8i16_v8i8_store(<8 x i16> %a0, ptr%p1) { +define void @trunc_usat_v8i16_v8i8_store(<8 x i16> %a0, ptr%p1) #0 { ; SSE2-LABEL: trunc_usat_v8i16_v8i8_store: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 @@ -4418,7 +4418,7 @@ ret void } -define <16 x i8> @trunc_usat_v16i16_v16i8(<16 x i16> %a0) { +define <16 x i8> @trunc_usat_v16i16_v16i8(<16 x i16> %a0) #0 { ; SSE2-LABEL: trunc_usat_v16i16_v16i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] @@ -4510,7 +4510,7 @@ ret <16 x i8> %3 } -define <32 x i8> @trunc_usat_v32i16_v32i8(ptr %p0) { +define <32 x i8> @trunc_usat_v32i16_v32i8(ptr %p0) #0 { ; SSE2-LABEL: trunc_usat_v32i16_v32i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa (%rdi), %xmm0 @@ -4640,7 +4640,7 @@ ret <32 x i8> %3 } -define <32 x i8> @trunc_usat_v32i32_v32i8(ptr %p0) { +define <32 x i8> @trunc_usat_v32i32_v32i8(ptr %p0) #0 { ; SSE2-LABEL: trunc_usat_v32i32_v32i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa (%rdi), %xmm7 @@ -4875,3 +4875,5 @@ %3 = trunc <32 x i32> %2 to <32 x i8> ret <32 x i8> %3 } + +attributes #0 = { "min-legal-vector-width" = "512" }