Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -4684,47 +4684,47 @@ // Permute let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_permvar_df_256 : GCCBuiltin<"__builtin_ia32_permvardf256_mask">, - Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, - llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty, + llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_df_512 : GCCBuiltin<"__builtin_ia32_permvardf512_mask">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, - llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; + Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, + llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_di_256 : GCCBuiltin<"__builtin_ia32_permvardi256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, - llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_di_512 : GCCBuiltin<"__builtin_ia32_permvardi512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_hi_128 : GCCBuiltin<"__builtin_ia32_permvarhi128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_hi_256 : GCCBuiltin<"__builtin_ia32_permvarhi256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, - llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_hi_512 : GCCBuiltin<"__builtin_ia32_permvarhi512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, - llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_qi_128 : GCCBuiltin<"__builtin_ia32_permvarqi128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, - llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_qi_256 : GCCBuiltin<"__builtin_ia32_permvarqi256_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, - llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, - llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_sf_256 : GCCBuiltin<"__builtin_ia32_permvarsf256_mask">, - Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, - llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty, + llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512_mask">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, - llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; + Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, + llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_si_256 : GCCBuiltin<"__builtin_ia32_permvarsi256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, - llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, - llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; } // Pack ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -7029,9 +7029,9 @@ ret <8 x i64> %res4 } -declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8) +declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x i64>, <8 x double>, <8 x double>, i8) -define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { +define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 @@ -7041,9 +7041,9 @@ ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq - %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) - %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3) - %res2 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) + %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) + %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> zeroinitializer, i8 %x3) + %res2 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) %res3 = fadd <8 x double> %res, %res1 %res4 = fadd <8 x double> %res3, %res2 ret <8 x double> %res4 @@ -7069,9 +7069,9 @@ ret <8 x i64> %res4 } -declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16) +declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x i32>, <16 x float>, <16 x float>, i16) -define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { +define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 @@ -7081,9 +7081,9 @@ ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq - %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) - %res1 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3) - %res2 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) + %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) + %res1 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> zeroinitializer, i16 %x3) + %res2 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) %res3 = fadd <16 x float> %res, %res1 %res4 = fadd <16 x float> %res3, %res2 ret <16 x float> %res4 Index: test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics.ll +++ test/CodeGen/X86/avx512vl-intrinsics.ll @@ -9203,9 +9203,9 @@ %res4 = add <4 x i64> %res3, %res2 ret <4 x i64> %res4 } -declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8) +declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x i64>, <4 x double>, <4 x double>, i8) -define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { +define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x i64> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] @@ -9215,9 +9215,9 @@ ; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xcb] ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) - %res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) - %res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) + %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x i64> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x i64> %x0, <4 x double> %x1, <4 x double> zeroinitializer, i8 %x3) + %res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x i64> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) %res3 = fadd <4 x double> %res, %res1 %res4 = fadd <4 x double> %res3, %res2 ret <4 x double> %res4 @@ -9243,9 +9243,9 @@ ret <4 x i64> %res4 } -declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8) +declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x i32>, <8 x float>, <8 x float>, i8) -define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { +define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x i32> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] @@ -9255,9 +9255,9 @@ ; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xcb] ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) - %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) - %res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) + %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x i32> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x i32> %x0, <8 x float> %x1, <8 x float> zeroinitializer, i8 %x3) + %res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x i32> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) %res3 = fadd <8 x float> %res, %res1 %res4 = fadd <8 x float> %res3, %res2 ret <8 x float> %res4