diff --git a/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c b/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c --- a/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c +++ b/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c @@ -1,2529 +1,536 @@ -// RUN: %clang_cc1 -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -O1 -fno-experimental-new-pass-manager -ffreestanding %s -O1 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s #include -// CHECK-LABEL: define i64 @test_mm512_reduce_max_epi64(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <8 x i64> [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] -// CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <8 x i64> [[TMP13]], [[TMP14]] -// CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] -// CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = icmp sgt <8 x i64> [[TMP21]], [[TMP22]] -// CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] -// CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP25]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] long long test_mm512_reduce_max_epi64(__m512i __W){ + // CHECK-LABEL: test_mm512_reduce_max_epi64 + // CHECK: entry: + // CHECK-NEXT: %shuffle.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %0 = icmp slt <8 x i64> %shuffle.i, %__W + // CHECK-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__W, <8 x i64> %shuffle.i + // CHECK-NEXT: %shuffle1.i = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %2 = icmp sgt <8 x i64> %1, %shuffle1.i + // CHECK-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %shuffle1.i + // CHECK-NEXT: %shuffle3.i = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %4 = icmp sgt <8 x i64> %3, %shuffle3.i + // CHECK-NEXT: %5 = select <8 x i1> %4, <8 x i64> %3, <8 x i64> %shuffle3.i + // CHECK-NEXT: %vecext.i = extractelement <8 x i64> %5, i32 0 + // CHECK-NEXT: ret i64 %vecext.i return _mm512_reduce_max_epi64(__W); } -// CHECK-LABEL: define i64 @test_mm512_reduce_max_epu64(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <8 x i64> [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] -// CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = icmp ugt <8 x i64> [[TMP13]], [[TMP14]] -// CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] -// CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = icmp ugt <8 x i64> [[TMP21]], [[TMP22]] -// CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] -// CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP25]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] unsigned long long test_mm512_reduce_max_epu64(__m512i __W){ + // CHECK-LABEL: test_mm512_reduce_max_epu64 + // CHECK: entry: + // CHECK-NEXT: %shuffle.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %0 = icmp ult <8 x i64> %shuffle.i, %__W + // CHECK-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__W, <8 x i64> %shuffle.i + // CHECK-NEXT: %shuffle1.i = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %2 = icmp ugt <8 x i64> %1, %shuffle1.i + // CHECK-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %shuffle1.i + // CHECK-NEXT: %shuffle3.i = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %4 = icmp ugt <8 x i64> %3, %shuffle3.i + // CHECK-NEXT: %5 = select <8 x i1> %4, <8 x i64> %3, <8 x i64> %shuffle3.i + // CHECK-NEXT: %vecext.i = extractelement <8 x i64> %5, i32 0 + // CHECK-NEXT: ret i64 %vecext.i return _mm512_reduce_max_epu64(__W); } -// CHECK-LABEL: define double @test_mm512_reduce_max_pd(<8 x double> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x double> [[TMP0]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT_I]], <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT2_I]], <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[__B_ADDR_I11_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I11_I]], align 32 -// CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> [[TMP5]], <4 x double> [[TMP6]]) #2 -// CHECK-NEXT: store <4 x double> [[TMP7]], <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT4_I]], <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT5_I]], <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[__B_ADDR_I9_I]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I9_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP12]], <2 x double> [[TMP13]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> [[TMP16]], <2 x i32> -// CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP18:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP19]], <2 x double> [[TMP20]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP22:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP22]], i32 0 -// CHECK-NEXT: ret double [[VECEXT_I]] double test_mm512_reduce_max_pd(__m512d __W){ + // CHECK-LABEL: test_mm512_reduce_max_pd + // CHECK: entry: + // CHECK-NEXT: %extract.i = shufflevector <8 x double> %__W, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %extract2.i = shufflevector <8 x double> %__W, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %0 = tail call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %extract.i, <4 x double> %extract2.i) #3 + // CHECK-NEXT: %extract4.i = shufflevector <4 x double> %0, <4 x double> undef, <2 x i32> + // CHECK-NEXT: %extract5.i = shufflevector <4 x double> %0, <4 x double> undef, <2 x i32> + // CHECK-NEXT: %1 = tail call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %extract4.i, <2 x double> %extract5.i) #3 + // CHECK-NEXT: %shuffle.i = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> + // CHECK-NEXT: %2 = tail call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %1, <2 x double> %shuffle.i) #3 + // CHECK-NEXT: %vecext.i = extractelement <2 x double> %2, i32 0 + // CHECK-NEXT: ret double %vecext.i return _mm512_reduce_max_pd(__W); } -// CHECK-LABEL: define i64 @test_mm512_reduce_min_epi64(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i64> [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] -// CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = icmp slt <8 x i64> [[TMP13]], [[TMP14]] -// CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] -// CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = icmp slt <8 x i64> [[TMP21]], [[TMP22]] -// CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] -// CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP25]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] long long test_mm512_reduce_min_epi64(__m512i __W){ + // CHECK-LABEL: test_mm512_reduce_min_epi64 + // CHECK: entry: + // CHECK-NEXT: %shuffle.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %0 = icmp sgt <8 x i64> %shuffle.i, %__W + // CHECK-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__W, <8 x i64> %shuffle.i + // CHECK-NEXT: %shuffle1.i = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %2 = icmp slt <8 x i64> %1, %shuffle1.i + // CHECK-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %shuffle1.i + // CHECK-NEXT: %shuffle3.i = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %4 = icmp slt <8 x i64> %3, %shuffle3.i + // CHECK-NEXT: %5 = select <8 x i1> %4, <8 x i64> %3, <8 x i64> %shuffle3.i + // CHECK-NEXT: %vecext.i = extractelement <8 x i64> %5, i32 0 + // CHECK-NEXT: ret i64 %vecext.i return _mm512_reduce_min_epi64(__W); } -// CHECK-LABEL: define i64 @test_mm512_reduce_min_epu64(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = icmp ult <8 x i64> [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] -// CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = icmp ult <8 x i64> [[TMP13]], [[TMP14]] -// CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] -// CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = icmp ult <8 x i64> [[TMP21]], [[TMP22]] -// CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] -// CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP25]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] unsigned long long test_mm512_reduce_min_epu64(__m512i __W){ + // CHECK-LABEL: test_mm512_reduce_min_epu64 + // CHECK: entry: + // CHECK-NEXT: %shuffle.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %0 = icmp ugt <8 x i64> %shuffle.i, %__W + // CHECK-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__W, <8 x i64> %shuffle.i + // CHECK-NEXT: %shuffle1.i = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %2 = icmp ult <8 x i64> %1, %shuffle1.i + // CHECK-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %shuffle1.i + // CHECK-NEXT: %shuffle3.i = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %4 = icmp ult <8 x i64> %3, %shuffle3.i + // CHECK-NEXT: %5 = select <8 x i1> %4, <8 x i64> %3, <8 x i64> %shuffle3.i + // CHECK-NEXT: %vecext.i = extractelement <8 x i64> %5, i32 0 + // CHECK-NEXT: ret i64 %vecext.i return _mm512_reduce_min_epu64(__W); } -// CHECK-LABEL: define double @test_mm512_reduce_min_pd(<8 x double> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x double> [[TMP0]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT_I]], <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT2_I]], <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[__B_ADDR_I11_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I11_I]], align 32 -// CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> [[TMP5]], <4 x double> [[TMP6]]) #2 -// CHECK-NEXT: store <4 x double> [[TMP7]], <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT4_I]], <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT5_I]], <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[__B_ADDR_I9_I]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I9_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP12]], <2 x double> [[TMP13]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> [[TMP16]], <2 x i32> -// CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP18:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP19]], <2 x double> [[TMP20]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP22:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP22]], i32 0 -// CHECK-NEXT: ret double [[VECEXT_I]] double test_mm512_reduce_min_pd(__m512d __W){ + // CHECK-LABEL: test_mm512_reduce_min_pd + // CHECK: entry: + // CHECK-NEXT: %extract.i = shufflevector <8 x double> %__W, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %extract2.i = shufflevector <8 x double> %__W, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %0 = tail call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %extract.i, <4 x double> %extract2.i) #3 + // CHECK-NEXT: %extract4.i = shufflevector <4 x double> %0, <4 x double> undef, <2 x i32> + // CHECK-NEXT: %extract5.i = shufflevector <4 x double> %0, <4 x double> undef, <2 x i32> + // CHECK-NEXT: %1 = tail call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %extract4.i, <2 x double> %extract5.i) #3 + // CHECK-NEXT: %shuffle.i = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> + // CHECK-NEXT: %2 = tail call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %1, <2 x double> %shuffle.i) #3 + // CHECK-NEXT: %vecext.i = extractelement <2 x double> %2, i32 0 + // CHECK-NEXT: ret double %vecext.i return _mm512_reduce_min_pd(__W); } -// CHECK-LABEL: define i64 @test_mm512_mask_reduce_max_epi64(i8 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i64 -9223372036854775808, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x i64> [[VECINIT_I_I]], i64 [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x i64> [[VECINIT1_I_I]], i64 [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x i64> [[VECINIT2_I_I]], i64 [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x i64> [[VECINIT3_I_I]], i64 [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x i64> [[VECINIT4_I_I]], i64 [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> -// CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] -// CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP18]], <8 x i64> [[TMP19]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = icmp sgt <8 x i64> [[TMP22]], [[TMP23]] -// CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] -// CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP26]], <8 x i64> [[TMP27]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP32:%.*]] = icmp sgt <8 x i64> [[TMP30]], [[TMP31]] -// CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] -// CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE5_I:%.*]] = shufflevector <8 x i64> [[TMP34]], <8 x i64> [[TMP35]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP40:%.*]] = icmp sgt <8 x i64> [[TMP38]], [[TMP39]] -// CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] -// CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP42:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP42]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){ + // CHECK-LABEL: test_mm512_mask_reduce_max_epi64 + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // CHECK-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__W, <8 x i64> + // CHECK-NEXT: %shuffle.i = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %2 = icmp sgt <8 x i64> %1, %shuffle.i + // CHECK-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %shuffle.i + // CHECK-NEXT: %shuffle3.i = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %4 = icmp sgt <8 x i64> %3, %shuffle3.i + // CHECK-NEXT: %5 = select <8 x i1> %4, <8 x i64> %3, <8 x i64> %shuffle3.i + // CHECK-NEXT: %shuffle5.i = shufflevector <8 x i64> %5, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %6 = icmp sgt <8 x i64> %5, %shuffle5.i + // CHECK-NEXT: %7 = select <8 x i1> %6, <8 x i64> %5, <8 x i64> %shuffle5.i + // CHECK-NEXT: %vecext.i = extractelement <8 x i64> %7, i32 0 + // CHECK-NEXT: ret i64 %vecext.i return _mm512_mask_reduce_max_epi64(__M, __W); } -// CHECK-LABEL: define i64 @test_mm512_mask_reduce_max_epu64(i8 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i8 [[TMP2]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> -// CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] -// CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = icmp ugt <8 x i64> [[TMP13]], [[TMP14]] -// CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] -// CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE2_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE2_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = icmp ugt <8 x i64> [[TMP21]], [[TMP22]] -// CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] -// CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE4_I:%.*]] = shufflevector <8 x i64> [[TMP25]], <8 x i64> [[TMP26]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE4_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP27]], <8 x i64>* [[__A_ADDR_I6_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = icmp ugt <8 x i64> [[TMP29]], [[TMP30]] -// CHECK-NEXT: [[TMP32:%.*]] = select <8 x i1> [[TMP31]], <8 x i64> [[TMP29]], <8 x i64> [[TMP30]] -// CHECK-NEXT: store <8 x i64> [[TMP32]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP33:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP33]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){ + // CHECK-LABEL: test_mm512_mask_reduce_max_epu64 + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // CHECK-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__W, <8 x i64> zeroinitializer + // CHECK-NEXT: %shuffle.i = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %2 = icmp ugt <8 x i64> %1, %shuffle.i + // CHECK-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %shuffle.i + // CHECK-NEXT: %shuffle2.i = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %4 = icmp ugt <8 x i64> %3, %shuffle2.i + // CHECK-NEXT: %5 = select <8 x i1> %4, <8 x i64> %3, <8 x i64> %shuffle2.i + // CHECK-NEXT: %shuffle4.i = shufflevector <8 x i64> %5, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %6 = icmp ugt <8 x i64> %5, %shuffle4.i + // CHECK-NEXT: %7 = select <8 x i1> %6, <8 x i64> %5, <8 x i64> %shuffle4.i + // CHECK-NEXT: %vecext.i = extractelement <8 x i64> %7, i32 0 + // CHECK-NEXT: ret i64 %vecext.i return _mm512_mask_reduce_max_epu64(__M, __W); } -// CHECK-LABEL: define double @test_mm512_mask_reduce_max_pd(i8 zeroext %__M, <8 x double> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store double 0xFFF0000000000000, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x double> [[VECINIT_I_I]], double [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x double> [[VECINIT1_I_I]], double [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x double> [[VECINIT2_I_I]], double [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x double> [[VECINIT3_I_I]], double [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x double> [[VECINIT4_I_I]], double [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x double> [[VECINIT5_I_I]], double [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x double> [[VECINIT6_I_I]], double [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> -// CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x double> [[TMP14]], <8 x double> [[TMP15]] -// CHECK-NEXT: store <8 x double> [[TMP17]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP18]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT_I]], <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x double> [[TMP19]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT4_I]], <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP20:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP21:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[__B_ADDR_I13_I]], align 32 -// CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I13_I]], align 32 -// CHECK-NEXT: [[TMP24:%.*]] = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> [[TMP22]], <4 x double> [[TMP23]]) #2 -// CHECK-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP25:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x double> [[TMP25]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT6_I]], <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT7_I]], <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP27:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP29]], <2 x double> [[TMP30]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP31]], <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP32:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP32]], <2 x double> [[TMP33]], <2 x i32> -// CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP34:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP35:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP36:%.*]] = load <2 x double>, <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP36]], <2 x double> [[TMP37]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP38]], <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP39]], i32 0 -// CHECK-NEXT: ret double [[VECEXT_I]] double test_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __W){ + // CHECK-LABEL: test_mm512_mask_reduce_max_pd + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // CHECK-NEXT: %1 = select <8 x i1> %0, <8 x double> %__W, <8 x double> + // CHECK-NEXT: %extract.i = shufflevector <8 x double> %1, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %extract4.i = shufflevector <8 x double> %1, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %2 = tail call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %extract.i, <4 x double> %extract4.i) #3 + // CHECK-NEXT: %extract6.i = shufflevector <4 x double> %2, <4 x double> undef, <2 x i32> + // CHECK-NEXT: %extract7.i = shufflevector <4 x double> %2, <4 x double> undef, <2 x i32> + // CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %extract6.i, <2 x double> %extract7.i) #3 + // CHECK-NEXT: %shuffle.i = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> + // CHECK-NEXT: %4 = tail call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %3, <2 x double> %shuffle.i) #3 + // CHECK-NEXT: %vecext.i = extractelement <2 x double> %4, i32 0 + // CHECK-NEXT: ret double %vecext.i return _mm512_mask_reduce_max_pd(__M, __W); } -// CHECK-LABEL: define i64 @test_mm512_mask_reduce_min_epi64(i8 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i64 9223372036854775807, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x i64> [[VECINIT_I_I]], i64 [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x i64> [[VECINIT1_I_I]], i64 [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x i64> [[VECINIT2_I_I]], i64 [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x i64> [[VECINIT3_I_I]], i64 [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x i64> [[VECINIT4_I_I]], i64 [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> -// CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] -// CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP18]], <8 x i64> [[TMP19]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = icmp slt <8 x i64> [[TMP22]], [[TMP23]] -// CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] -// CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP26]], <8 x i64> [[TMP27]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP32:%.*]] = icmp slt <8 x i64> [[TMP30]], [[TMP31]] -// CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] -// CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE5_I:%.*]] = shufflevector <8 x i64> [[TMP34]], <8 x i64> [[TMP35]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP40:%.*]] = icmp slt <8 x i64> [[TMP38]], [[TMP39]] -// CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] -// CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP42:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP42]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){ + // CHECK-LABEL: test_mm512_mask_reduce_min_epi64 + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // CHECK-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__W, <8 x i64> + // CHECK-NEXT: %shuffle.i = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %2 = icmp slt <8 x i64> %1, %shuffle.i + // CHECK-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %shuffle.i + // CHECK-NEXT: %shuffle3.i = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %4 = icmp slt <8 x i64> %3, %shuffle3.i + // CHECK-NEXT: %5 = select <8 x i1> %4, <8 x i64> %3, <8 x i64> %shuffle3.i + // CHECK-NEXT: %shuffle5.i = shufflevector <8 x i64> %5, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %6 = icmp slt <8 x i64> %5, %shuffle5.i + // CHECK-NEXT: %7 = select <8 x i1> %6, <8 x i64> %5, <8 x i64> %shuffle5.i + // CHECK-NEXT: %vecext.i = extractelement <8 x i64> %7, i32 0 + // CHECK-NEXT: ret i64 %vecext.i return _mm512_mask_reduce_min_epi64(__M, __W); } -// CHECK-LABEL: define i64 @test_mm512_mask_reduce_min_epu64(i8 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i64 -1, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x i64> [[VECINIT_I_I]], i64 [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x i64> [[VECINIT1_I_I]], i64 [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x i64> [[VECINIT2_I_I]], i64 [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x i64> [[VECINIT3_I_I]], i64 [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x i64> [[VECINIT4_I_I]], i64 [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> -// CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] -// CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP18]], <8 x i64> [[TMP19]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = icmp ult <8 x i64> [[TMP22]], [[TMP23]] -// CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] -// CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP26]], <8 x i64> [[TMP27]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 -// CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP32:%.*]] = icmp ult <8 x i64> [[TMP30]], [[TMP31]] -// CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] -// CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[SHUFFLE5_I:%.*]] = shufflevector <8 x i64> [[TMP34]], <8 x i64> [[TMP35]], <8 x i32> -// CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 -// CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP40:%.*]] = icmp ult <8 x i64> [[TMP38]], [[TMP39]] -// CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] -// CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[TMP42:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP42]], i32 0 -// CHECK-NEXT: ret i64 [[VECEXT_I]] long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){ + // CHECK-LABEL: test_mm512_mask_reduce_min_epu64 + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // CHECK-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__W, <8 x i64> + // CHECK-NEXT: %shuffle.i = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %2 = icmp ult <8 x i64> %1, %shuffle.i + // CHECK-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %shuffle.i + // CHECK-NEXT: %shuffle3.i = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %4 = icmp ult <8 x i64> %3, %shuffle3.i + // CHECK-NEXT: %5 = select <8 x i1> %4, <8 x i64> %3, <8 x i64> %shuffle3.i + // CHECK-NEXT: %shuffle5.i = shufflevector <8 x i64> %5, <8 x i64> undef, <8 x i32> + // CHECK-NEXT: %6 = icmp ult <8 x i64> %5, %shuffle5.i + // CHECK-NEXT: %7 = select <8 x i1> %6, <8 x i64> %5, <8 x i64> %shuffle5.i + // CHECK-NEXT: %vecext.i = extractelement <8 x i64> %7, i32 0 + // CHECK-NEXT: ret i64 %vecext.i return _mm512_mask_reduce_min_epu64(__M, __W); } -// CHECK-LABEL: define double @test_mm512_mask_reduce_min_pd(i8 zeroext %__M, <8 x double> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store double 0x7FF0000000000000, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x double> [[VECINIT_I_I]], double [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x double> [[VECINIT1_I_I]], double [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x double> [[VECINIT2_I_I]], double [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x double> [[VECINIT3_I_I]], double [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x double> [[VECINIT4_I_I]], double [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x double> [[VECINIT5_I_I]], double [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x double> [[VECINIT6_I_I]], double [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> -// CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x double> [[TMP14]], <8 x double> [[TMP15]] -// CHECK-NEXT: store <8 x double> [[TMP17]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP18]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT_I]], <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP19:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x double> [[TMP19]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: store <4 x double> [[EXTRACT4_I]], <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP20:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP21:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[__B_ADDR_I13_I]], align 32 -// CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I13_I]], align 32 -// CHECK-NEXT: [[TMP24:%.*]] = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> [[TMP22]], <4 x double> [[TMP23]]) #2 -// CHECK-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP25:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x double> [[TMP25]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT6_I]], <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> undef, <2 x i32> -// CHECK-NEXT: store <2 x double> [[EXTRACT7_I]], <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP27:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP29]], <2 x double> [[TMP30]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP31]], <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP32:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP32]], <2 x double> [[TMP33]], <2 x i32> -// CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP34:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP35:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP36:%.*]] = load <2 x double>, <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP36]], <2 x double> [[TMP37]]) #2 -// CHECK-NEXT: store <2 x double> [[TMP38]], <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP39]], i32 0 -// CHECK-NEXT: ret double [[VECEXT_I]] double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){ + // CHECK-LABEL: test_mm512_mask_reduce_min_pd + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // CHECK-NEXT: %1 = select <8 x i1> %0, <8 x double> %__W, <8 x double> + // CHECK-NEXT: %extract.i = shufflevector <8 x double> %1, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %extract4.i = shufflevector <8 x double> %1, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %2 = tail call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %extract.i, <4 x double> %extract4.i) #3 + // CHECK-NEXT: %extract6.i = shufflevector <4 x double> %2, <4 x double> undef, <2 x i32> + // CHECK-NEXT: %extract7.i = shufflevector <4 x double> %2, <4 x double> undef, <2 x i32> + // CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %extract6.i, <2 x double> %extract7.i) #3 + // CHECK-NEXT: %shuffle.i = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> + // CHECK-NEXT: %4 = tail call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %3, <2 x double> %shuffle.i) #3 + // CHECK-NEXT: %vecext.i = extractelement <2 x double> %4, i32 0 + // CHECK-NEXT: ret double %vecext.i return _mm512_mask_reduce_min_pd(__M, __W); } -// CHECK-LABEL: define i32 @test_mm512_reduce_max_epi32(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> -// CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <8 x i32> [[TMP6]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] -// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i32> [[TMP10]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP11]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT4_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP16]] to <4 x i32> -// CHECK-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP18]] to <4 x i32> -// CHECK-NEXT: [[TMP20:%.*]] = icmp sgt <4 x i32> [[TMP17]], [[TMP19]] -// CHECK-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP20]], <4 x i32> [[TMP17]], <4 x i32> [[TMP19]] -// CHECK-NEXT: [[TMP22:%.*]] = bitcast <4 x i32> [[TMP21]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP22]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <2 x i64> [[TMP23]] to <4 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i64> [[TMP25]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> [[TMP26]], <4 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP28]], <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <4 x i32> -// CHECK-NEXT: [[TMP32:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i64> [[TMP32]] to <4 x i32> -// CHECK-NEXT: [[TMP34:%.*]] = icmp sgt <4 x i32> [[TMP31]], [[TMP33]] -// CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP31]], <4 x i32> [[TMP33]] -// CHECK-NEXT: [[TMP36:%.*]] = bitcast <4 x i32> [[TMP35]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP36]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i64> [[TMP37]] to <4 x i32> -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i64> [[TMP39]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x i32> [[TMP38]], <4 x i32> [[TMP40]], <4 x i32> -// CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[SHUFFLE8_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP41]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP42]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = bitcast <2 x i64> [[TMP44]] to <4 x i32> -// CHECK-NEXT: [[TMP46:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i64> [[TMP46]] to <4 x i32> -// CHECK-NEXT: [[TMP48:%.*]] = icmp sgt <4 x i32> [[TMP45]], [[TMP47]] -// CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] -// CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] int test_mm512_reduce_max_epi32(__m512i __W){ + // CHECK-LABEL: test_mm512_reduce_max_epi32 + // CHECK: entry: + // CHECK-NEXT: %extract.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %extract2.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %0 = bitcast <4 x i64> %extract.i to <8 x i32> + // CHECK-NEXT: %1 = bitcast <4 x i64> %extract2.i to <8 x i32> + // CHECK-NEXT: %2 = icmp sgt <8 x i32> %0, %1 + // CHECK-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // CHECK-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // CHECK-NEXT: %extract4.i = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %extract5.i = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %5 = bitcast <2 x i64> %extract4.i to <4 x i32> + // CHECK-NEXT: %6 = bitcast <2 x i64> %extract5.i to <4 x i32> + // CHECK-NEXT: %7 = icmp sgt <4 x i32> %5, %6 + // CHECK-NEXT: %8 = select <4 x i1> %7, <4 x i32> %5, <4 x i32> %6 + // CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %9 = icmp sgt <4 x i32> %8, %shuffle.i + // CHECK-NEXT: %10 = select <4 x i1> %9, <4 x i32> %8, <4 x i32> %shuffle.i + // CHECK-NEXT: %shuffle8.i = shufflevector <4 x i32> %10, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %11 = icmp sgt <4 x i32> %10, %shuffle8.i + // CHECK-NEXT: %12 = select <4 x i1> %11, <4 x i32> %10, <4 x i32> %shuffle8.i + // CHECK-NEXT: %vecext.i = extractelement <4 x i32> %12, i32 0 + // CHECK-NEXT: ret i32 %vecext.i return _mm512_reduce_max_epi32(__W); } -// CHECK-LABEL: define i32 @test_mm512_reduce_max_epu32(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> -// CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <8 x i32> [[TMP6]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] -// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i32> [[TMP10]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP11]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT4_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP16]] to <4 x i32> -// CHECK-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP18]] to <4 x i32> -// CHECK-NEXT: [[TMP20:%.*]] = icmp ugt <4 x i32> [[TMP17]], [[TMP19]] -// CHECK-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP20]], <4 x i32> [[TMP17]], <4 x i32> [[TMP19]] -// CHECK-NEXT: [[TMP22:%.*]] = bitcast <4 x i32> [[TMP21]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP22]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <2 x i64> [[TMP23]] to <4 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i64> [[TMP25]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> [[TMP26]], <4 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP28]], <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <4 x i32> -// CHECK-NEXT: [[TMP32:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i64> [[TMP32]] to <4 x i32> -// CHECK-NEXT: [[TMP34:%.*]] = icmp ugt <4 x i32> [[TMP31]], [[TMP33]] -// CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP31]], <4 x i32> [[TMP33]] -// CHECK-NEXT: [[TMP36:%.*]] = bitcast <4 x i32> [[TMP35]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP36]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i64> [[TMP37]] to <4 x i32> -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i64> [[TMP39]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x i32> [[TMP38]], <4 x i32> [[TMP40]], <4 x i32> -// CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[SHUFFLE8_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP41]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP42]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = bitcast <2 x i64> [[TMP44]] to <4 x i32> -// CHECK-NEXT: [[TMP46:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i64> [[TMP46]] to <4 x i32> -// CHECK-NEXT: [[TMP48:%.*]] = icmp ugt <4 x i32> [[TMP45]], [[TMP47]] -// CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] -// CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] unsigned int test_mm512_reduce_max_epu32(__m512i __W){ + // CHECK-LABEL: test_mm512_reduce_max_epu32 + // CHECK: entry: + // CHECK-NEXT: %extract.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %extract2.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %0 = bitcast <4 x i64> %extract.i to <8 x i32> + // CHECK-NEXT: %1 = bitcast <4 x i64> %extract2.i to <8 x i32> + // CHECK-NEXT: %2 = icmp ugt <8 x i32> %0, %1 + // CHECK-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // CHECK-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // CHECK-NEXT: %extract4.i = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %extract5.i = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %5 = bitcast <2 x i64> %extract4.i to <4 x i32> + // CHECK-NEXT: %6 = bitcast <2 x i64> %extract5.i to <4 x i32> + // CHECK-NEXT: %7 = icmp ugt <4 x i32> %5, %6 + // CHECK-NEXT: %8 = select <4 x i1> %7, <4 x i32> %5, <4 x i32> %6 + // CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %9 = icmp ugt <4 x i32> %8, %shuffle.i + // CHECK-NEXT: %10 = select <4 x i1> %9, <4 x i32> %8, <4 x i32> %shuffle.i + // CHECK-NEXT: %shuffle8.i = shufflevector <4 x i32> %10, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %11 = icmp ugt <4 x i32> %10, %shuffle8.i + // CHECK-NEXT: %12 = select <4 x i1> %11, <4 x i32> %10, <4 x i32> %shuffle8.i + // CHECK-NEXT: %vecext.i = extractelement <4 x i32> %12, i32 0 + // CHECK-NEXT: ret i32 %vecext.i return _mm512_reduce_max_epu32(__W); } -// CHECK-LABEL: define float @test_mm512_reduce_max_ps(<16 x float> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <16 x float> [[TMP0]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x float> [[TMP1]] to <8 x double> -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x double> [[EXTRACT_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP3]], <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x float> [[TMP4]] to <8 x double> -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double> [[EXTRACT2_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP6]], <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP7:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* [[__B_ADDR_I15_I]], align 32 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I15_I]], align 32 -// CHECK-NEXT: [[TMP11:%.*]] = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> [[TMP9]], <8 x float> [[TMP10]]) #2 -// CHECK-NEXT: store <8 x float> [[TMP11]], <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT4_I]], <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <8 x float> [[TMP13]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT5_I]], <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP18:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP16]], <4 x float> [[TMP17]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP18]], <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP20:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP21:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP22:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP25:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP23]], <4 x float> [[TMP24]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP25]], <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP27:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x float> [[TMP26]], <4 x float> [[TMP27]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE8_I]], <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP32:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP30]], <4 x float> [[TMP31]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP32]], <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP33]], i32 0 -// CHECK-NEXT: ret float [[VECEXT_I]] float test_mm512_reduce_max_ps(__m512 __W){ + // CHECK-LABEL: test_mm512_reduce_max_ps + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast <16 x float> %__W to <8 x double> + // CHECK-NEXT: %extract.i = shufflevector <8 x double> %0, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %1 = bitcast <4 x double> %extract.i to <8 x float> + // CHECK-NEXT: %extract2.i = shufflevector <8 x double> %0, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %2 = bitcast <4 x double> %extract2.i to <8 x float> + // CHECK-NEXT: %3 = tail call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %1, <8 x float> %2) #3 + // CHECK-NEXT: %extract4.i = shufflevector <8 x float> %3, <8 x float> undef, <4 x i32> + // CHECK-NEXT: %extract5.i = shufflevector <8 x float> %3, <8 x float> undef, <4 x i32> + // CHECK-NEXT: %4 = tail call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %extract4.i, <4 x float> %extract5.i) #3 + // CHECK-NEXT: %shuffle.i = shufflevector <4 x float> %4, <4 x float> undef, <4 x i32> + // CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %4, <4 x float> %shuffle.i) #3 + // CHECK-NEXT: %shuffle8.i = shufflevector <4 x float> %5, <4 x float> undef, <4 x i32> + // CHECK-NEXT: %6 = tail call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %5, <4 x float> %shuffle8.i) #3 + // CHECK-NEXT: %vecext.i = extractelement <4 x float> %6, i32 0 + // CHECK-NEXT: ret float %vecext.i return _mm512_reduce_max_ps(__W); } -// CHECK-LABEL: define i32 @test_mm512_reduce_min_epi32(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> -// CHECK-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[TMP6]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] -// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i32> [[TMP10]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP11]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT4_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP16]] to <4 x i32> -// CHECK-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP18]] to <4 x i32> -// CHECK-NEXT: [[TMP20:%.*]] = icmp slt <4 x i32> [[TMP17]], [[TMP19]] -// CHECK-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP20]], <4 x i32> [[TMP17]], <4 x i32> [[TMP19]] -// CHECK-NEXT: [[TMP22:%.*]] = bitcast <4 x i32> [[TMP21]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP22]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <2 x i64> [[TMP23]] to <4 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i64> [[TMP25]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> [[TMP26]], <4 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP28]], <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <4 x i32> -// CHECK-NEXT: [[TMP32:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i64> [[TMP32]] to <4 x i32> -// CHECK-NEXT: [[TMP34:%.*]] = icmp slt <4 x i32> [[TMP31]], [[TMP33]] -// CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP31]], <4 x i32> [[TMP33]] -// CHECK-NEXT: [[TMP36:%.*]] = bitcast <4 x i32> [[TMP35]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP36]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i64> [[TMP37]] to <4 x i32> -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i64> [[TMP39]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x i32> [[TMP38]], <4 x i32> [[TMP40]], <4 x i32> -// CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[SHUFFLE8_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP41]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP42]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = bitcast <2 x i64> [[TMP44]] to <4 x i32> -// CHECK-NEXT: [[TMP46:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i64> [[TMP46]] to <4 x i32> -// CHECK-NEXT: [[TMP48:%.*]] = icmp slt <4 x i32> [[TMP45]], [[TMP47]] -// CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] -// CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] int test_mm512_reduce_min_epi32(__m512i __W){ + // CHECK-LABEL: test_mm512_reduce_min_epi32 + // CHECK: entry: + // CHECK-NEXT: %extract.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %extract2.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %0 = bitcast <4 x i64> %extract.i to <8 x i32> + // CHECK-NEXT: %1 = bitcast <4 x i64> %extract2.i to <8 x i32> + // CHECK-NEXT: %2 = icmp slt <8 x i32> %0, %1 + // CHECK-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // CHECK-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // CHECK-NEXT: %extract4.i = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %extract5.i = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %5 = bitcast <2 x i64> %extract4.i to <4 x i32> + // CHECK-NEXT: %6 = bitcast <2 x i64> %extract5.i to <4 x i32> + // CHECK-NEXT: %7 = icmp slt <4 x i32> %5, %6 + // CHECK-NEXT: %8 = select <4 x i1> %7, <4 x i32> %5, <4 x i32> %6 + // CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %9 = icmp slt <4 x i32> %8, %shuffle.i + // CHECK-NEXT: %10 = select <4 x i1> %9, <4 x i32> %8, <4 x i32> %shuffle.i + // CHECK-NEXT: %shuffle8.i = shufflevector <4 x i32> %10, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %11 = icmp slt <4 x i32> %10, %shuffle8.i + // CHECK-NEXT: %12 = select <4 x i1> %11, <4 x i32> %10, <4 x i32> %shuffle8.i + // CHECK-NEXT: %vecext.i = extractelement <4 x i32> %12, i32 0 + // CHECK-NEXT: ret i32 %vecext.i return _mm512_reduce_min_epi32(__W); } -// CHECK-LABEL: define i32 @test_mm512_reduce_min_epu32(<8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> -// CHECK-NEXT: [[TMP9:%.*]] = icmp ult <8 x i32> [[TMP6]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] -// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i32> [[TMP10]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP11]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT4_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP16]] to <4 x i32> -// CHECK-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP18]] to <4 x i32> -// CHECK-NEXT: [[TMP20:%.*]] = icmp ult <4 x i32> [[TMP17]], [[TMP19]] -// CHECK-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP20]], <4 x i32> [[TMP17]], <4 x i32> [[TMP19]] -// CHECK-NEXT: [[TMP22:%.*]] = bitcast <4 x i32> [[TMP21]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP22]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <2 x i64> [[TMP23]] to <4 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i64> [[TMP25]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> [[TMP26]], <4 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP28]], <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <4 x i32> -// CHECK-NEXT: [[TMP32:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i64> [[TMP32]] to <4 x i32> -// CHECK-NEXT: [[TMP34:%.*]] = icmp ult <4 x i32> [[TMP31]], [[TMP33]] -// CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP31]], <4 x i32> [[TMP33]] -// CHECK-NEXT: [[TMP36:%.*]] = bitcast <4 x i32> [[TMP35]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP36]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i64> [[TMP37]] to <4 x i32> -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i64> [[TMP39]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x i32> [[TMP38]], <4 x i32> [[TMP40]], <4 x i32> -// CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[SHUFFLE8_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP41]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP42]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = bitcast <2 x i64> [[TMP44]] to <4 x i32> -// CHECK-NEXT: [[TMP46:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i64> [[TMP46]] to <4 x i32> -// CHECK-NEXT: [[TMP48:%.*]] = icmp ult <4 x i32> [[TMP45]], [[TMP47]] -// CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] -// CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] unsigned int test_mm512_reduce_min_epu32(__m512i __W){ + // CHECK-LABEL: test_mm512_reduce_min_epu32 + // CHECK: entry: + // CHECK-NEXT: %extract.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %extract2.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %0 = bitcast <4 x i64> %extract.i to <8 x i32> + // CHECK-NEXT: %1 = bitcast <4 x i64> %extract2.i to <8 x i32> + // CHECK-NEXT: %2 = icmp ult <8 x i32> %0, %1 + // CHECK-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // CHECK-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // CHECK-NEXT: %extract4.i = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %extract5.i = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %5 = bitcast <2 x i64> %extract4.i to <4 x i32> + // CHECK-NEXT: %6 = bitcast <2 x i64> %extract5.i to <4 x i32> + // CHECK-NEXT: %7 = icmp ult <4 x i32> %5, %6 + // CHECK-NEXT: %8 = select <4 x i1> %7, <4 x i32> %5, <4 x i32> %6 + // CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %9 = icmp ult <4 x i32> %8, %shuffle.i + // CHECK-NEXT: %10 = select <4 x i1> %9, <4 x i32> %8, <4 x i32> %shuffle.i + // CHECK-NEXT: %shuffle8.i = shufflevector <4 x i32> %10, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %11 = icmp ult <4 x i32> %10, %shuffle8.i + // CHECK-NEXT: %12 = select <4 x i1> %11, <4 x i32> %10, <4 x i32> %shuffle8.i + // CHECK-NEXT: %vecext.i = extractelement <4 x i32> %12, i32 0 + // CHECK-NEXT: ret i32 %vecext.i return _mm512_reduce_min_epu32(__W); } -// CHECK-LABEL: define float @test_mm512_reduce_min_ps(<16 x float> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store <16 x float> [[TMP0]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x float> [[TMP1]] to <8 x double> -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x double> [[EXTRACT_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP3]], <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x float> [[TMP4]] to <8 x double> -// CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double> [[EXTRACT2_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP6]], <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP7:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP8:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* [[__B_ADDR_I15_I]], align 32 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I15_I]], align 32 -// CHECK-NEXT: [[TMP11:%.*]] = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> [[TMP9]], <8 x float> [[TMP10]]) #2 -// CHECK-NEXT: store <8 x float> [[TMP11]], <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT4_I]], <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <8 x float> [[TMP13]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT5_I]], <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP15:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP18:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP16]], <4 x float> [[TMP17]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP18]], <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP20:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP21:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP22:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP25:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP23]], <4 x float> [[TMP24]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP25]], <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP27:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x float> [[TMP26]], <4 x float> [[TMP27]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE8_I]], <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP32:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP30]], <4 x float> [[TMP31]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP32]], <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP33:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP33]], i32 0 -// CHECK-NEXT: ret float [[VECEXT_I]] float test_mm512_reduce_min_ps(__m512 __W){ + // CHECK-LABEL: test_mm512_reduce_min_ps + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast <16 x float> %__W to <8 x double> + // CHECK-NEXT: %extract.i = shufflevector <8 x double> %0, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %1 = bitcast <4 x double> %extract.i to <8 x float> + // CHECK-NEXT: %extract2.i = shufflevector <8 x double> %0, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %2 = bitcast <4 x double> %extract2.i to <8 x float> + // CHECK-NEXT: %3 = tail call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %1, <8 x float> %2) #3 + // CHECK-NEXT: %extract4.i = shufflevector <8 x float> %3, <8 x float> undef, <4 x i32> + // CHECK-NEXT: %extract5.i = shufflevector <8 x float> %3, <8 x float> undef, <4 x i32> + // CHECK-NEXT: %4 = tail call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %extract4.i, <4 x float> %extract5.i) #3 + // CHECK-NEXT: %shuffle.i = shufflevector <4 x float> %4, <4 x float> undef, <4 x i32> + // CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %4, <4 x float> %shuffle.i) #3 + // CHECK-NEXT: %shuffle8.i = shufflevector <4 x float> %5, <4 x float> undef, <4 x i32> + // CHECK-NEXT: %6 = tail call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %5, <4 x float> %shuffle8.i) #3 + // CHECK-NEXT: %vecext.i = extractelement <4 x float> %6, i32 0 + // CHECK-NEXT: ret float %vecext.i return _mm512_reduce_min_ps(__W); } -// CHECK-LABEL: define i32 @test_mm512_mask_reduce_max_epi32(i16 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i32 -2147483648, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x i32> undef, i32 [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x i32> [[VECINIT_I_I]], i32 [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x i32> [[VECINIT1_I_I]], i32 [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x i32> [[VECINIT2_I_I]], i32 [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x i32> [[VECINIT3_I_I]], i32 [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x i32> [[VECINIT4_I_I]], i32 [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x i32> [[VECINIT5_I_I]], i32 [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x i32> [[VECINIT6_I_I]], i32 [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x i32> [[VECINIT7_I_I]], i32 [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x i32> [[VECINIT8_I_I]], i32 [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x i32> [[VECINIT9_I_I]], i32 [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x i32> [[VECINIT10_I_I]], i32 [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x i32> [[VECINIT11_I_I]], i32 [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x i32> [[VECINIT12_I_I]], i32 [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> -// CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A2_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A2_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> -// CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] -// CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i32> [[TMP28]] to <8 x i64> -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP30]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x i64> [[TMP31]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT4_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> -// CHECK-NEXT: [[TMP38:%.*]] = icmp sgt <8 x i32> [[TMP35]], [[TMP37]] -// CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[TMP39]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP40]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x i64> [[TMP41]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT6_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x i64> [[TMP42]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT7_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V1_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP44]], <2 x i64>* [[__V2_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i64> [[TMP45]] to <4 x i32> -// CHECK-NEXT: [[TMP47:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = bitcast <2 x i64> [[TMP47]] to <4 x i32> -// CHECK-NEXT: [[TMP49:%.*]] = icmp sgt <4 x i32> [[TMP46]], [[TMP48]] -// CHECK-NEXT: [[TMP50:%.*]] = select <4 x i1> [[TMP49]], <4 x i32> [[TMP46]], <4 x i32> [[TMP48]] -// CHECK-NEXT: [[TMP51:%.*]] = bitcast <4 x i32> [[TMP50]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP51]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP53:%.*]] = bitcast <2 x i64> [[TMP52]] to <4 x i32> -// CHECK-NEXT: [[TMP54:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP55:%.*]] = bitcast <2 x i64> [[TMP54]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> [[TMP55]], <4 x i32> -// CHECK-NEXT: [[TMP56:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP56]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP57:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP58:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP57]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP58]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP59:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP60:%.*]] = bitcast <2 x i64> [[TMP59]] to <4 x i32> -// CHECK-NEXT: [[TMP61:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP62:%.*]] = bitcast <2 x i64> [[TMP61]] to <4 x i32> -// CHECK-NEXT: [[TMP63:%.*]] = icmp sgt <4 x i32> [[TMP60]], [[TMP62]] -// CHECK-NEXT: [[TMP64:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP60]], <4 x i32> [[TMP62]] -// CHECK-NEXT: [[TMP65:%.*]] = bitcast <4 x i32> [[TMP64]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP65]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP66:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP67:%.*]] = bitcast <2 x i64> [[TMP66]] to <4 x i32> -// CHECK-NEXT: [[TMP68:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP69:%.*]] = bitcast <2 x i64> [[TMP68]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> [[TMP69]], <4 x i32> -// CHECK-NEXT: [[TMP70:%.*]] = bitcast <4 x i32> [[SHUFFLE10_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP70]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP71:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP72:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP71]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP72]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP73:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP74:%.*]] = bitcast <2 x i64> [[TMP73]] to <4 x i32> -// CHECK-NEXT: [[TMP75:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP76:%.*]] = bitcast <2 x i64> [[TMP75]] to <4 x i32> -// CHECK-NEXT: [[TMP77:%.*]] = icmp sgt <4 x i32> [[TMP74]], [[TMP76]] -// CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] -// CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP78]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP80:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP80]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){ + // CHECK-LABEL: test_mm512_mask_reduce_max_epi32 + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // CHECK-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // CHECK-NEXT: %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> + // CHECK-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // CHECK-NEXT: %extract.i = shufflevector <8 x i64> %3, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %extract4.i = shufflevector <8 x i64> %3, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %4 = bitcast <4 x i64> %extract.i to <8 x i32> + // CHECK-NEXT: %5 = bitcast <4 x i64> %extract4.i to <8 x i32> + // CHECK-NEXT: %6 = icmp sgt <8 x i32> %4, %5 + // CHECK-NEXT: %7 = select <8 x i1> %6, <8 x i32> %4, <8 x i32> %5 + // CHECK-NEXT: %8 = bitcast <8 x i32> %7 to <4 x i64> + // CHECK-NEXT: %extract6.i = shufflevector <4 x i64> %8, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %extract7.i = shufflevector <4 x i64> %8, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %9 = bitcast <2 x i64> %extract6.i to <4 x i32> + // CHECK-NEXT: %10 = bitcast <2 x i64> %extract7.i to <4 x i32> + // CHECK-NEXT: %11 = icmp sgt <4 x i32> %9, %10 + // CHECK-NEXT: %12 = select <4 x i1> %11, <4 x i32> %9, <4 x i32> %10 + // CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %12, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %13 = icmp sgt <4 x i32> %12, %shuffle.i + // CHECK-NEXT: %14 = select <4 x i1> %13, <4 x i32> %12, <4 x i32> %shuffle.i + // CHECK-NEXT: %shuffle10.i = shufflevector <4 x i32> %14, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %15 = icmp sgt <4 x i32> %14, %shuffle10.i + // CHECK-NEXT: %16 = select <4 x i1> %15, <4 x i32> %14, <4 x i32> %shuffle10.i + // CHECK-NEXT: %vecext.i = extractelement <4 x i32> %16, i32 0 + // CHECK-NEXT: ret i32 %vecext.i return _mm512_mask_reduce_max_epi32(__M, __W); } -// CHECK-LABEL: define i32 @test_mm512_mask_reduce_max_epu32(i16 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i16 [[TMP2]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP5]] to <16 x i32> -// CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP7]] to <16 x i32> -// CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> -// CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP6]], <16 x i32> [[TMP8]] -// CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP10]] to <8 x i64> -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP12]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT3_I:%.*]] = shufflevector <8 x i64> [[TMP13]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT3_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP14:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP15:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP14]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP15]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP16:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i64> [[TMP16]] to <8 x i32> -// CHECK-NEXT: [[TMP18:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i64> [[TMP18]] to <8 x i32> -// CHECK-NEXT: [[TMP20:%.*]] = icmp ugt <8 x i32> [[TMP17]], [[TMP19]] -// CHECK-NEXT: [[TMP21:%.*]] = select <8 x i1> [[TMP20]], <8 x i32> [[TMP17]], <8 x i32> [[TMP19]] -// CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i32> [[TMP21]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP22]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x i64> [[TMP24]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT6_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP26:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[__V1_ADDR_I13_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP26]], <2 x i64>* [[__V2_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP27:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP28:%.*]] = bitcast <2 x i64> [[TMP27]] to <4 x i32> -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = bitcast <2 x i64> [[TMP29]] to <4 x i32> -// CHECK-NEXT: [[TMP31:%.*]] = icmp ugt <4 x i32> [[TMP28]], [[TMP30]] -// CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP31]], <4 x i32> [[TMP28]], <4 x i32> [[TMP30]] -// CHECK-NEXT: [[TMP33:%.*]] = bitcast <4 x i32> [[TMP32]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP34:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP35:%.*]] = bitcast <2 x i64> [[TMP34]] to <4 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = bitcast <2 x i64> [[TMP36]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP37]], <4 x i32> -// CHECK-NEXT: [[TMP38:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP38]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP39]], <2 x i64>* [[__V1_ADDR_I11_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP40]], <2 x i64>* [[__V2_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP41:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = bitcast <2 x i64> [[TMP41]] to <4 x i32> -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = bitcast <2 x i64> [[TMP43]] to <4 x i32> -// CHECK-NEXT: [[TMP45:%.*]] = icmp ugt <4 x i32> [[TMP42]], [[TMP44]] -// CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP45]], <4 x i32> [[TMP42]], <4 x i32> [[TMP44]] -// CHECK-NEXT: [[TMP47:%.*]] = bitcast <4 x i32> [[TMP46]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP47]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP49:%.*]] = bitcast <2 x i64> [[TMP48]] to <4 x i32> -// CHECK-NEXT: [[TMP50:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP51:%.*]] = bitcast <2 x i64> [[TMP50]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE9_I:%.*]] = shufflevector <4 x i32> [[TMP49]], <4 x i32> [[TMP51]], <4 x i32> -// CHECK-NEXT: [[TMP52:%.*]] = bitcast <4 x i32> [[SHUFFLE9_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP52]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP53:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP54:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP53]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP54]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP55:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP56:%.*]] = bitcast <2 x i64> [[TMP55]] to <4 x i32> -// CHECK-NEXT: [[TMP57:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP58:%.*]] = bitcast <2 x i64> [[TMP57]] to <4 x i32> -// CHECK-NEXT: [[TMP59:%.*]] = icmp ugt <4 x i32> [[TMP56]], [[TMP58]] -// CHECK-NEXT: [[TMP60:%.*]] = select <4 x i1> [[TMP59]], <4 x i32> [[TMP56]], <4 x i32> [[TMP58]] -// CHECK-NEXT: [[TMP61:%.*]] = bitcast <4 x i32> [[TMP60]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP62:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP62]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){ + // CHECK-LABEL: test_mm512_mask_reduce_max_epu32 + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // CHECK-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // CHECK-NEXT: %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer + // CHECK-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // CHECK-NEXT: %extract.i = shufflevector <8 x i64> %3, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %extract3.i = shufflevector <8 x i64> %3, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %4 = bitcast <4 x i64> %extract.i to <8 x i32> + // CHECK-NEXT: %5 = bitcast <4 x i64> %extract3.i to <8 x i32> + // CHECK-NEXT: %6 = icmp ugt <8 x i32> %4, %5 + // CHECK-NEXT: %7 = select <8 x i1> %6, <8 x i32> %4, <8 x i32> %5 + // CHECK-NEXT: %8 = bitcast <8 x i32> %7 to <4 x i64> + // CHECK-NEXT: %extract5.i = shufflevector <4 x i64> %8, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %extract6.i = shufflevector <4 x i64> %8, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %9 = bitcast <2 x i64> %extract5.i to <4 x i32> + // CHECK-NEXT: %10 = bitcast <2 x i64> %extract6.i to <4 x i32> + // CHECK-NEXT: %11 = icmp ugt <4 x i32> %9, %10 + // CHECK-NEXT: %12 = select <4 x i1> %11, <4 x i32> %9, <4 x i32> %10 + // CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %12, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %13 = icmp ugt <4 x i32> %12, %shuffle.i + // CHECK-NEXT: %14 = select <4 x i1> %13, <4 x i32> %12, <4 x i32> %shuffle.i + // CHECK-NEXT: %shuffle9.i = shufflevector <4 x i32> %14, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %15 = icmp ugt <4 x i32> %14, %shuffle9.i + // CHECK-NEXT: %16 = select <4 x i1> %15, <4 x i32> %14, <4 x i32> %shuffle9.i + // CHECK-NEXT: %vecext.i = extractelement <4 x i32> %16, i32 0 + // CHECK-NEXT: ret i32 %vecext.i return _mm512_mask_reduce_max_epu32(__M, __W); } -// CHECK-LABEL: define float @test_mm512_mask_reduce_max_ps(i16 zeroext %__M, <16 x float> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store float 0xFFF0000000000000, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x float> [[VECINIT_I_I]], float [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x float> [[VECINIT1_I_I]], float [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x float> [[VECINIT2_I_I]], float [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x float> [[VECINIT3_I_I]], float [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x float> [[VECINIT4_I_I]], float [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x float> [[VECINIT5_I_I]], float [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x float> [[VECINIT6_I_I]], float [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x float> [[VECINIT7_I_I]], float [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x float> [[VECINIT8_I_I]], float [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x float> [[VECINIT9_I_I]], float [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x float> [[VECINIT10_I_I]], float [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x float> [[VECINIT11_I_I]], float [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x float> [[VECINIT12_I_I]], float [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x float> [[VECINIT13_I_I]], float [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x float> [[VECINIT14_I_I]], float [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: store i16 [[TMP19]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast i16 [[TMP21]] to <16 x i1> -// CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x float> [[TMP22]], <16 x float> [[TMP23]] -// CHECK-NEXT: store <16 x float> [[TMP25]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x float> [[TMP26]] to <8 x double> -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP27]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP28:%.*]] = bitcast <4 x double> [[EXTRACT_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP28]], <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP29:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x float> [[TMP29]] to <8 x double> -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x double> [[TMP30]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x double> [[EXTRACT4_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP31]], <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP32:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP33:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* [[__B_ADDR_I17_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I17_I]], align 32 -// CHECK-NEXT: [[TMP36:%.*]] = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> [[TMP34]], <8 x float> [[TMP35]]) #2 -// CHECK-NEXT: store <8 x float> [[TMP36]], <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP37:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <8 x float> [[TMP37]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT6_I]], <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <8 x float> [[TMP38]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT7_I]], <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP39:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* [[__B_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP41]], <4 x float> [[TMP42]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP43]], <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP44]], <4 x float> [[TMP45]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP46:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP47:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP50:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP48]], <4 x float> [[TMP49]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP50]], <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP51:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x float> [[TMP51]], <4 x float> [[TMP52]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE10_I]], <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP53:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP54:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP53]], <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP55:%.*]] = load <4 x float>, <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP57:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP55]], <4 x float> [[TMP56]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP57]], <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP58:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP58]], i32 0 -// CHECK-NEXT: ret float [[VECEXT_I]] float test_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __W){ + // CHECK-LABEL: test_mm512_mask_reduce_max_ps + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast i16 %__M to <16 x i1> + // CHECK-NEXT: %1 = select <16 x i1> %0, <16 x float> %__W, <16 x float> + // CHECK-NEXT: %2 = bitcast <16 x float> %1 to <8 x double> + // CHECK-NEXT: %extract.i = shufflevector <8 x double> %2, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %3 = bitcast <4 x double> %extract.i to <8 x float> + // CHECK-NEXT: %extract4.i = shufflevector <8 x double> %2, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %4 = bitcast <4 x double> %extract4.i to <8 x float> + // CHECK-NEXT: %5 = tail call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %3, <8 x float> %4) #3 + // CHECK-NEXT: %extract6.i = shufflevector <8 x float> %5, <8 x float> undef, <4 x i32> + // CHECK-NEXT: %extract7.i = shufflevector <8 x float> %5, <8 x float> undef, <4 x i32> + // CHECK-NEXT: %6 = tail call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %extract6.i, <4 x float> %extract7.i) #3 + // CHECK-NEXT: %shuffle.i = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> + // CHECK-NEXT: %7 = tail call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> %shuffle.i) #3 + // CHECK-NEXT: %shuffle10.i = shufflevector <4 x float> %7, <4 x float> undef, <4 x i32> + // CHECK-NEXT: %8 = tail call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %7, <4 x float> %shuffle10.i) #3 + // CHECK-NEXT: %vecext.i = extractelement <4 x float> %8, i32 0 + // CHECK-NEXT: ret float %vecext.i return _mm512_mask_reduce_max_ps(__M, __W); } -// CHECK-LABEL: define i32 @test_mm512_mask_reduce_min_epi32(i16 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i32 2147483647, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x i32> undef, i32 [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x i32> [[VECINIT_I_I]], i32 [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x i32> [[VECINIT1_I_I]], i32 [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x i32> [[VECINIT2_I_I]], i32 [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x i32> [[VECINIT3_I_I]], i32 [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x i32> [[VECINIT4_I_I]], i32 [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x i32> [[VECINIT5_I_I]], i32 [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x i32> [[VECINIT6_I_I]], i32 [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x i32> [[VECINIT7_I_I]], i32 [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x i32> [[VECINIT8_I_I]], i32 [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x i32> [[VECINIT9_I_I]], i32 [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x i32> [[VECINIT10_I_I]], i32 [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x i32> [[VECINIT11_I_I]], i32 [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x i32> [[VECINIT12_I_I]], i32 [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> -// CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> -// CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] -// CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i32> [[TMP28]] to <8 x i64> -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP30]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x i64> [[TMP31]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT4_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> -// CHECK-NEXT: [[TMP38:%.*]] = icmp slt <8 x i32> [[TMP35]], [[TMP37]] -// CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[TMP39]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP40]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x i64> [[TMP41]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT6_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x i64> [[TMP42]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT7_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V1_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP44]], <2 x i64>* [[__V2_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i64> [[TMP45]] to <4 x i32> -// CHECK-NEXT: [[TMP47:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = bitcast <2 x i64> [[TMP47]] to <4 x i32> -// CHECK-NEXT: [[TMP49:%.*]] = icmp slt <4 x i32> [[TMP46]], [[TMP48]] -// CHECK-NEXT: [[TMP50:%.*]] = select <4 x i1> [[TMP49]], <4 x i32> [[TMP46]], <4 x i32> [[TMP48]] -// CHECK-NEXT: [[TMP51:%.*]] = bitcast <4 x i32> [[TMP50]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP51]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP53:%.*]] = bitcast <2 x i64> [[TMP52]] to <4 x i32> -// CHECK-NEXT: [[TMP54:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP55:%.*]] = bitcast <2 x i64> [[TMP54]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> [[TMP55]], <4 x i32> -// CHECK-NEXT: [[TMP56:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP56]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP57:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP58:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP57]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP58]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP59:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP60:%.*]] = bitcast <2 x i64> [[TMP59]] to <4 x i32> -// CHECK-NEXT: [[TMP61:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP62:%.*]] = bitcast <2 x i64> [[TMP61]] to <4 x i32> -// CHECK-NEXT: [[TMP63:%.*]] = icmp slt <4 x i32> [[TMP60]], [[TMP62]] -// CHECK-NEXT: [[TMP64:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP60]], <4 x i32> [[TMP62]] -// CHECK-NEXT: [[TMP65:%.*]] = bitcast <4 x i32> [[TMP64]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP65]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP66:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP67:%.*]] = bitcast <2 x i64> [[TMP66]] to <4 x i32> -// CHECK-NEXT: [[TMP68:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP69:%.*]] = bitcast <2 x i64> [[TMP68]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> [[TMP69]], <4 x i32> -// CHECK-NEXT: [[TMP70:%.*]] = bitcast <4 x i32> [[SHUFFLE10_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP70]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP71:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP72:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP71]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP72]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP73:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP74:%.*]] = bitcast <2 x i64> [[TMP73]] to <4 x i32> -// CHECK-NEXT: [[TMP75:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP76:%.*]] = bitcast <2 x i64> [[TMP75]] to <4 x i32> -// CHECK-NEXT: [[TMP77:%.*]] = icmp slt <4 x i32> [[TMP74]], [[TMP76]] -// CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] -// CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP78]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP80:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP80]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){ + // CHECK-LABEL: test_mm512_mask_reduce_min_epi32 + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // CHECK-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // CHECK-NEXT: %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> + // CHECK-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // CHECK-NEXT: %extract.i = shufflevector <8 x i64> %3, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %extract4.i = shufflevector <8 x i64> %3, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %4 = bitcast <4 x i64> %extract.i to <8 x i32> + // CHECK-NEXT: %5 = bitcast <4 x i64> %extract4.i to <8 x i32> + // CHECK-NEXT: %6 = icmp slt <8 x i32> %4, %5 + // CHECK-NEXT: %7 = select <8 x i1> %6, <8 x i32> %4, <8 x i32> %5 + // CHECK-NEXT: %8 = bitcast <8 x i32> %7 to <4 x i64> + // CHECK-NEXT: %extract6.i = shufflevector <4 x i64> %8, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %extract7.i = shufflevector <4 x i64> %8, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %9 = bitcast <2 x i64> %extract6.i to <4 x i32> + // CHECK-NEXT: %10 = bitcast <2 x i64> %extract7.i to <4 x i32> + // CHECK-NEXT: %11 = icmp slt <4 x i32> %9, %10 + // CHECK-NEXT: %12 = select <4 x i1> %11, <4 x i32> %9, <4 x i32> %10 + // CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %12, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %13 = icmp slt <4 x i32> %12, %shuffle.i + // CHECK-NEXT: %14 = select <4 x i1> %13, <4 x i32> %12, <4 x i32> %shuffle.i + // CHECK-NEXT: %shuffle10.i = shufflevector <4 x i32> %14, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %15 = icmp slt <4 x i32> %14, %shuffle10.i + // CHECK-NEXT: %16 = select <4 x i1> %15, <4 x i32> %14, <4 x i32> %shuffle10.i + // CHECK-NEXT: %vecext.i = extractelement <4 x i32> %16, i32 0 + // CHECK-NEXT: ret i32 %vecext.i return _mm512_mask_reduce_min_epi32(__M, __W); } -// CHECK-LABEL: define i32 @test_mm512_mask_reduce_min_epu32(i16 zeroext %__M, <8 x i64> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store i32 -1, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x i32> undef, i32 [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x i32> [[VECINIT_I_I]], i32 [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x i32> [[VECINIT1_I_I]], i32 [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x i32> [[VECINIT2_I_I]], i32 [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x i32> [[VECINIT3_I_I]], i32 [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x i32> [[VECINIT4_I_I]], i32 [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x i32> [[VECINIT5_I_I]], i32 [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x i32> [[VECINIT6_I_I]], i32 [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x i32> [[VECINIT7_I_I]], i32 [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x i32> [[VECINIT8_I_I]], i32 [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x i32> [[VECINIT9_I_I]], i32 [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x i32> [[VECINIT10_I_I]], i32 [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x i32> [[VECINIT11_I_I]], i32 [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x i32> [[VECINIT12_I_I]], i32 [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> -// CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> -// CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> -// CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] -// CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i32> [[TMP28]] to <8 x i64> -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP30]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x i64> [[TMP31]], <8 x i64> undef, <4 x i32> -// CHECK-NEXT: store <4 x i64> [[EXTRACT4_I]], <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> -// CHECK-NEXT: [[TMP38:%.*]] = icmp ult <8 x i32> [[TMP35]], [[TMP37]] -// CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] -// CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[TMP39]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP40]], <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x i64> [[TMP41]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT6_I]], <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x i64> [[TMP42]], <4 x i64> undef, <2 x i32> -// CHECK-NEXT: store <2 x i64> [[EXTRACT7_I]], <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V1_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP44]], <2 x i64>* [[__V2_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i64> [[TMP45]] to <4 x i32> -// CHECK-NEXT: [[TMP47:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = bitcast <2 x i64> [[TMP47]] to <4 x i32> -// CHECK-NEXT: [[TMP49:%.*]] = icmp ult <4 x i32> [[TMP46]], [[TMP48]] -// CHECK-NEXT: [[TMP50:%.*]] = select <4 x i1> [[TMP49]], <4 x i32> [[TMP46]], <4 x i32> [[TMP48]] -// CHECK-NEXT: [[TMP51:%.*]] = bitcast <4 x i32> [[TMP50]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP51]], <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP53:%.*]] = bitcast <2 x i64> [[TMP52]] to <4 x i32> -// CHECK-NEXT: [[TMP54:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP55:%.*]] = bitcast <2 x i64> [[TMP54]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> [[TMP55]], <4 x i32> -// CHECK-NEXT: [[TMP56:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP56]], <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP57:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP58:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP57]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP58]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP59:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP60:%.*]] = bitcast <2 x i64> [[TMP59]] to <4 x i32> -// CHECK-NEXT: [[TMP61:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP62:%.*]] = bitcast <2 x i64> [[TMP61]] to <4 x i32> -// CHECK-NEXT: [[TMP63:%.*]] = icmp ult <4 x i32> [[TMP60]], [[TMP62]] -// CHECK-NEXT: [[TMP64:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP60]], <4 x i32> [[TMP62]] -// CHECK-NEXT: [[TMP65:%.*]] = bitcast <4 x i32> [[TMP64]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP65]], <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP66:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP67:%.*]] = bitcast <2 x i64> [[TMP66]] to <4 x i32> -// CHECK-NEXT: [[TMP68:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP69:%.*]] = bitcast <2 x i64> [[TMP68]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> [[TMP69]], <4 x i32> -// CHECK-NEXT: [[TMP70:%.*]] = bitcast <4 x i32> [[SHUFFLE10_I]] to <2 x i64> -// CHECK-NEXT: store <2 x i64> [[TMP70]], <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP71:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP72:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP71]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x i64> [[TMP72]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP73:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP74:%.*]] = bitcast <2 x i64> [[TMP73]] to <4 x i32> -// CHECK-NEXT: [[TMP75:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP76:%.*]] = bitcast <2 x i64> [[TMP75]] to <4 x i32> -// CHECK-NEXT: [[TMP77:%.*]] = icmp ult <4 x i32> [[TMP74]], [[TMP76]] -// CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] -// CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP78]], <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP80:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP80]], i32 0 -// CHECK-NEXT: ret i32 [[VECEXT_I]] unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){ + // CHECK-LABEL: test_mm512_mask_reduce_min_epu32 + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // CHECK-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // CHECK-NEXT: %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> + // CHECK-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // CHECK-NEXT: %extract.i = shufflevector <8 x i64> %3, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %extract4.i = shufflevector <8 x i64> %3, <8 x i64> undef, <4 x i32> + // CHECK-NEXT: %4 = bitcast <4 x i64> %extract.i to <8 x i32> + // CHECK-NEXT: %5 = bitcast <4 x i64> %extract4.i to <8 x i32> + // CHECK-NEXT: %6 = icmp ult <8 x i32> %4, %5 + // CHECK-NEXT: %7 = select <8 x i1> %6, <8 x i32> %4, <8 x i32> %5 + // CHECK-NEXT: %8 = bitcast <8 x i32> %7 to <4 x i64> + // CHECK-NEXT: %extract6.i = shufflevector <4 x i64> %8, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %extract7.i = shufflevector <4 x i64> %8, <4 x i64> undef, <2 x i32> + // CHECK-NEXT: %9 = bitcast <2 x i64> %extract6.i to <4 x i32> + // CHECK-NEXT: %10 = bitcast <2 x i64> %extract7.i to <4 x i32> + // CHECK-NEXT: %11 = icmp ult <4 x i32> %9, %10 + // CHECK-NEXT: %12 = select <4 x i1> %11, <4 x i32> %9, <4 x i32> %10 + // CHECK-NEXT: %shuffle.i = shufflevector <4 x i32> %12, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %13 = icmp ult <4 x i32> %12, %shuffle.i + // CHECK-NEXT: %14 = select <4 x i1> %13, <4 x i32> %12, <4 x i32> %shuffle.i + // CHECK-NEXT: %shuffle10.i = shufflevector <4 x i32> %14, <4 x i32> undef, <4 x i32> + // CHECK-NEXT: %15 = icmp ult <4 x i32> %14, %shuffle10.i + // CHECK-NEXT: %16 = select <4 x i1> %15, <4 x i32> %14, <4 x i32> %shuffle10.i + // CHECK-NEXT: %vecext.i = extractelement <4 x i32> %16, i32 0 + // CHECK-NEXT: ret i32 %vecext.i return _mm512_mask_reduce_min_epu32(__M, __W); } -// CHECK-LABEL: define float @test_mm512_mask_reduce_min_ps(i16 zeroext %__M, <16 x float> %__W) #0 { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 -// CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store float 0x7FF0000000000000, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x float> [[VECINIT_I_I]], float [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x float> [[VECINIT1_I_I]], float [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x float> [[VECINIT2_I_I]], float [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x float> [[VECINIT3_I_I]], float [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x float> [[VECINIT4_I_I]], float [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x float> [[VECINIT5_I_I]], float [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x float> [[VECINIT6_I_I]], float [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x float> [[VECINIT7_I_I]], float [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x float> [[VECINIT8_I_I]], float [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x float> [[VECINIT9_I_I]], float [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x float> [[VECINIT10_I_I]], float [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x float> [[VECINIT11_I_I]], float [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x float> [[VECINIT12_I_I]], float [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x float> [[VECINIT13_I_I]], float [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x float> [[VECINIT14_I_I]], float [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 -// CHECK-NEXT: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: store i16 [[TMP19]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[__W2_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP24:%.*]] = bitcast i16 [[TMP21]] to <16 x i1> -// CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x float> [[TMP22]], <16 x float> [[TMP23]] -// CHECK-NEXT: store <16 x float> [[TMP25]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP26:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x float> [[TMP26]] to <8 x double> -// CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP27]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP28:%.*]] = bitcast <4 x double> [[EXTRACT_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP28]], <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP29:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x float> [[TMP29]] to <8 x double> -// CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x double> [[TMP30]], <8 x double> undef, <4 x i32> -// CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x double> [[EXTRACT4_I]] to <8 x float> -// CHECK-NEXT: store <8 x float> [[TMP31]], <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: [[TMP32:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 -// CHECK-NEXT: [[TMP33:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* [[__B_ADDR_I17_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I17_I]], align 32 -// CHECK-NEXT: [[TMP36:%.*]] = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> [[TMP34]], <8 x float> [[TMP35]]) #2 -// CHECK-NEXT: store <8 x float> [[TMP36]], <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[TMP37:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <8 x float> [[TMP37]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT6_I]], <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 -// CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <8 x float> [[TMP38]], <8 x float> undef, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACT7_I]], <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: [[TMP39:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 -// CHECK-NEXT: [[TMP40:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* [[__B_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP43:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP41]], <4 x float> [[TMP42]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP43]], <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP44:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP45:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP44]], <4 x float> [[TMP45]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: [[TMP46:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 -// CHECK-NEXT: [[TMP47:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP50:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP48]], <4 x float> [[TMP49]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP50]], <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP51:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP52:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x float> [[TMP51]], <4 x float> [[TMP52]], <4 x i32> -// CHECK-NEXT: store <4 x float> [[SHUFFLE10_I]], <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: [[TMP53:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 -// CHECK-NEXT: [[TMP54:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP53]], <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP55:%.*]] = load <4 x float>, <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP57:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP55]], <4 x float> [[TMP56]]) #2 -// CHECK-NEXT: store <4 x float> [[TMP57]], <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[TMP58:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP58]], i32 0 -// CHECK-NEXT: ret float [[VECEXT_I]] float test_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __W){ + // CHECK-LABEL: test_mm512_mask_reduce_min_ps + // CHECK: entry: + // CHECK-NEXT: %0 = bitcast i16 %__M to <16 x i1> + // CHECK-NEXT: %1 = select <16 x i1> %0, <16 x float> %__W, <16 x float> + // CHECK-NEXT: %2 = bitcast <16 x float> %1 to <8 x double> + // CHECK-NEXT: %extract.i = shufflevector <8 x double> %2, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %3 = bitcast <4 x double> %extract.i to <8 x float> + // CHECK-NEXT: %extract4.i = shufflevector <8 x double> %2, <8 x double> undef, <4 x i32> + // CHECK-NEXT: %4 = bitcast <4 x double> %extract4.i to <8 x float> + // CHECK-NEXT: %5 = tail call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %3, <8 x float> %4) #3 + // CHECK-NEXT: %extract6.i = shufflevector <8 x float> %5, <8 x float> undef, <4 x i32> + // CHECK-NEXT: %extract7.i = shufflevector <8 x float> %5, <8 x float> undef, <4 x i32> + // CHECK-NEXT: %6 = tail call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %extract6.i, <4 x float> %extract7.i) #3 + // CHECK-NEXT: %shuffle.i = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> + // CHECK-NEXT: %7 = tail call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %6, <4 x float> %shuffle.i) #3 + // CHECK-NEXT: %shuffle10.i = shufflevector <4 x float> %7, <4 x float> undef, <4 x i32> + // CHECK-NEXT: %8 = tail call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> %shuffle10.i) #3 + // CHECK-NEXT: %vecext.i = extractelement <4 x float> %8, i32 0 + // CHECK-NEXT: ret float %vecext.i return _mm512_mask_reduce_min_ps(__M, __W); } diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -1,10133 +1,23288 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -O1 -fno-experimental-new-pass-manager -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=APPLE +// RUN: %clang_cc1 -O1 -fno-experimental-new-pass-manager -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=X64 + +// RUN: %clang_cc1 -O1 -fexperimental-new-pass-manager -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=APPLE +// RUN: %clang_cc1 -O1 -fexperimental-new-pass-manager -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=X64 #include __m512d test_mm512_sqrt_pd(__m512d a) { - // CHECK-LABEL: @test_mm512_sqrt_pd - // CHECK: call <8 x double> @llvm.sqrt.v8f64(<8 x double> %{{.*}}) + // APPLE-LABEL: test_mm512_sqrt_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_sqrt_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_sqrt_pd(a); } __m512d test_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_sqrt_pd - // CHECK: call <8 x double> @llvm.sqrt.v8f64(<8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sqrt_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.sqrt.v8f64(<8 x double> %__A) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_sqrt_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.sqrt.v8f64(<8 x double> %__A) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_sqrt_pd (__W,__U,__A); } __m512d test_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_sqrt_pd - // CHECK: call <8 x double> @llvm.sqrt.v8f64(<8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> {{.*}} + // APPLE-LABEL: test_mm512_maskz_sqrt_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.sqrt.v8f64(<8 x double> %__A) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_sqrt_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.sqrt.v8f64(<8 x double> %__A) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_sqrt_pd (__U,__A); } __m512d test_mm512_mask_sqrt_round_pd(__m512d __W,__mmask8 __U,__m512d __A) { - // CHECK-LABEL: @test_mm512_mask_sqrt_round_pd - // CHECK: call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %{{.*}}, i32 8) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sqrt_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %__A, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_sqrt_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %__A, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_sqrt_round_pd(__W,__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_sqrt_round_pd(__mmask8 __U,__m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_sqrt_round_pd - // CHECK: call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %{{.*}}, i32 8) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> {{.*}} + // APPLE-LABEL: test_mm512_maskz_sqrt_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %__A, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_sqrt_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %__A, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_sqrt_round_pd(__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_sqrt_round_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_sqrt_round_pd - // CHECK: call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %{{.*}}, i32 8) + // APPLE-LABEL: test_mm512_sqrt_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %__A, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_sqrt_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %__A, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_sqrt_round_pd(__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_sqrt_ps(__m512 a) { - // CHECK-LABEL: @test_mm512_sqrt_ps - // CHECK: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.*}}) + // APPLE-LABEL: test_mm512_sqrt_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_sqrt_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_sqrt_ps(a); } __m512 test_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_sqrt_ps - // CHECK: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sqrt_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> %__A) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_sqrt_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> %__A) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_sqrt_ps( __W, __U, __A); } __m512 test_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_sqrt_ps - // CHECK: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}} + // APPLE-LABEL: test_mm512_maskz_sqrt_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> %__A) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_sqrt_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> %__A) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_sqrt_ps(__U ,__A); } __m512 test_mm512_mask_sqrt_round_ps(__m512 __W,__mmask16 __U,__m512 __A) { - // CHECK-LABEL: @test_mm512_mask_sqrt_round_ps - // CHECK: call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %{{.*}}, i32 8) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sqrt_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %__A, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_sqrt_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %__A, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_sqrt_round_ps(__W,__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_sqrt_round_ps(__mmask16 __U,__m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_sqrt_round_ps - // CHECK: call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %{{.*}}, i32 8) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}} + // APPLE-LABEL: test_mm512_maskz_sqrt_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %__A, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_sqrt_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %__A, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_sqrt_round_ps(__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_sqrt_round_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_sqrt_round_ps - // CHECK: call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %{{.*}}, i32 8) + // APPLE-LABEL: test_mm512_sqrt_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %__A, i32 8) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_sqrt_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %__A, i32 8) + // X64-NEXT: ret <16 x float> %0 return _mm512_sqrt_round_ps(__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_rsqrt14_pd(__m512d a) { - // CHECK-LABEL: @test_mm512_rsqrt14_pd - // CHECK: @llvm.x86.avx512.rsqrt14.pd.512 + // APPLE-LABEL: test_mm512_rsqrt14_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.rsqrt14.pd.512(<8 x double> %a, <8 x double> zeroinitializer, i8 -1) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_rsqrt14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.rsqrt14.pd.512(<8 x double> %a, <8 x double> zeroinitializer, i8 -1) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_rsqrt14_pd(a); } __m512d test_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_rsqrt14_pd - // CHECK: @llvm.x86.avx512.rsqrt14.pd.512 + // APPLE-LABEL: test_mm512_mask_rsqrt14_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.rsqrt14.pd.512(<8 x double> %__A, <8 x double> %__W, i8 %__U) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_rsqrt14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.rsqrt14.pd.512(<8 x double> %__A, <8 x double> %__W, i8 %__U) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_rsqrt14_pd (__W,__U,__A); } __m512d test_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_rsqrt14_pd - // CHECK: @llvm.x86.avx512.rsqrt14.pd.512 + // APPLE-LABEL: test_mm512_maskz_rsqrt14_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.rsqrt14.pd.512(<8 x double> %__A, <8 x double> zeroinitializer, i8 %__U) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_maskz_rsqrt14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.rsqrt14.pd.512(<8 x double> %__A, <8 x double> zeroinitializer, i8 %__U) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_maskz_rsqrt14_pd (__U,__A); } __m512 test_mm512_rsqrt14_ps(__m512 a) { - // CHECK-LABEL: @test_mm512_rsqrt14_ps - // CHECK: @llvm.x86.avx512.rsqrt14.ps.512 + // APPLE-LABEL: test_mm512_rsqrt14_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a, <16 x float> zeroinitializer, i16 -1) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_rsqrt14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a, <16 x float> zeroinitializer, i16 -1) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_rsqrt14_ps(a); } __m512 test_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_rsqrt14_ps - // CHECK: @llvm.x86.avx512.rsqrt14.ps.512 + // APPLE-LABEL: test_mm512_mask_rsqrt14_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %__A, <16 x float> %__W, i16 %__U) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_mask_rsqrt14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %__A, <16 x float> %__W, i16 %__U) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_mask_rsqrt14_ps (__W,__U,__A); } __m512 test_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_rsqrt14_ps - // CHECK: @llvm.x86.avx512.rsqrt14.ps.512 + // APPLE-LABEL: test_mm512_maskz_rsqrt14_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %__A, <16 x float> zeroinitializer, i16 %__U) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_maskz_rsqrt14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %__A, <16 x float> zeroinitializer, i16 %__U) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_maskz_rsqrt14_ps (__U,__A); } __m512 test_mm512_add_ps(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_add_ps - // CHECK: fadd <16 x float> + // APPLE-LABEL: test_mm512_add_ps + // APPLE: entry: + // APPLE-NEXT: %add.i = fadd <16 x float> %a, %b + // APPLE-NEXT: ret <16 x float> %add.i + // X64-LABEL: test_mm512_add_ps + // X64: entry: + // X64-NEXT: %add.i = fadd <16 x float> %a, %b + // X64-NEXT: ret <16 x float> %add.i return _mm512_add_ps(a, b); } __m512d test_mm512_add_pd(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_add_pd - // CHECK: fadd <8 x double> + // APPLE-LABEL: test_mm512_add_pd + // APPLE: entry: + // APPLE-NEXT: %add.i = fadd <8 x double> %a, %b + // APPLE-NEXT: ret <8 x double> %add.i + // X64-LABEL: test_mm512_add_pd + // X64: entry: + // X64-NEXT: %add.i = fadd <8 x double> %a, %b + // X64-NEXT: ret <8 x double> %add.i return _mm512_add_pd(a, b); } __m512 test_mm512_mul_ps(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mul_ps - // CHECK: fmul <16 x float> + // APPLE-LABEL: test_mm512_mul_ps + // APPLE: entry: + // APPLE-NEXT: %mul.i = fmul <16 x float> %a, %b + // APPLE-NEXT: ret <16 x float> %mul.i + // X64-LABEL: test_mm512_mul_ps + // X64: entry: + // X64-NEXT: %mul.i = fmul <16 x float> %a, %b + // X64-NEXT: ret <16 x float> %mul.i return _mm512_mul_ps(a, b); } __m512d test_mm512_mul_pd(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mul_pd - // CHECK: fmul <8 x double> + // APPLE-LABEL: test_mm512_mul_pd + // APPLE: entry: + // APPLE-NEXT: %mul.i = fmul <8 x double> %a, %b + // APPLE-NEXT: ret <8 x double> %mul.i + // X64-LABEL: test_mm512_mul_pd + // X64: entry: + // X64-NEXT: %mul.i = fmul <8 x double> %a, %b + // X64-NEXT: ret <8 x double> %mul.i return _mm512_mul_pd(a, b); } void test_mm512_storeu_si512 (void *__P, __m512i __A) { - // CHECK-LABEL: @test_mm512_storeu_si512 - // CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 1{{$}} - // CHECK-NEXT: ret void + // APPLE-LABEL: test_mm512_storeu_si512 + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: store <8 x i64> %__A, <8 x i64>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_storeu_si512 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: store <8 x i64> %__A, <8 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret void _mm512_storeu_si512 ( __P,__A); } void test_mm512_storeu_ps(void *p, __m512 a) { - // CHECK-LABEL: @test_mm512_storeu_ps - // CHECK: store <16 x float> %{{.*}}, <16 x float>* %{{.*}}, align 1{{$}} - // CHECK-NEXT: ret void + // APPLE-LABEL: test_mm512_storeu_ps + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast i8* %p to <16 x float>* + // APPLE-NEXT: store <16 x float> %a, <16 x float>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_storeu_ps + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %p to <16 x float>* + // X64-NEXT: store <16 x float> %a, <16 x float>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret void _mm512_storeu_ps(p, a); } void test_mm512_storeu_pd(void *p, __m512d a) { - // CHECK-LABEL: @test_mm512_storeu_pd - // CHECK: store <8 x double> %{{.*}}, <8 x double>* %{{.*}}, align 1{{$}} - // CHECK-NEXT: ret void + // APPLE-LABEL: test_mm512_storeu_pd + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast i8* %p to <8 x double>* + // APPLE-NEXT: store <8 x double> %a, <8 x double>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_storeu_pd + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %p to <8 x double>* + // X64-NEXT: store <8 x double> %a, <8 x double>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret void _mm512_storeu_pd(p, a); } void test_mm512_mask_store_ps(void *p, __m512 a, __mmask16 m) { - // CHECK-LABEL: @test_mm512_mask_store_ps - // CHECK: @llvm.masked.store.v16f32.p0v16f32(<16 x float> %{{.*}}, <16 x float>* %{{.*}}, i32 64, <16 x i1> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_store_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %p to <16 x float>* + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> %a, <16 x float>* %0, i32 64, <16 x i1> %1) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_store_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %p to <16 x float>* + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> %a, <16 x float>* %0, i32 64, <16 x i1> %1) #12 + // X64-NEXT: ret void _mm512_mask_store_ps(p, m, a); } void test_mm512_store_si512 (void *__P, __m512i __A) { - // CHECK-LABEL: @test_mm512_store_si512 - // CHECK: load <8 x i64>, <8 x i64>* %__A.addr.i, align 64 - // CHECK: [[SI512_3:%.+]] = load i8*, i8** %__P.addr.i, align 8 - // CHECK: bitcast i8* [[SI512_3]] to <8 x i64>* - // CHECK: store <8 x i64> + // APPLE-LABEL: test_mm512_store_si512 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: store <8 x i64> %__A, <8 x i64>* %0, align 64, !tbaa !2 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_store_si512 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: store <8 x i64> %__A, <8 x i64>* %0, align 64, !tbaa !2 + // X64-NEXT: ret void _mm512_store_si512 ( __P,__A); } void test_mm512_store_epi32 (void *__P, __m512i __A) { - // CHECK-LABEL: @test_mm512_store_epi32 - // CHECK: load <8 x i64>, <8 x i64>* %__A.addr.i, align 64 - // CHECK: [[Si32_3:%.+]] = load i8*, i8** %__P.addr.i, align 8 - // CHECK: bitcast i8* [[Si32_3]] to <8 x i64>* - // CHECK: store <8 x i64> + // APPLE-LABEL: test_mm512_store_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: store <8 x i64> %__A, <8 x i64>* %0, align 64, !tbaa !2 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_store_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: store <8 x i64> %__A, <8 x i64>* %0, align 64, !tbaa !2 + // X64-NEXT: ret void _mm512_store_epi32 ( __P,__A); } void test_mm512_store_epi64 (void *__P, __m512i __A) { - // CHECK-LABEL: @test_mm512_store_epi64 - // CHECK: load <8 x i64>, <8 x i64>* %__A.addr.i, align 64 - // CHECK: [[SI64_3:%.+]] = load i8*, i8** %__P.addr.i, align 8 - // CHECK: bitcast i8* [[SI64_3]] to <8 x i64>* - // CHECK: store <8 x i64> + // APPLE-LABEL: test_mm512_store_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: store <8 x i64> %__A, <8 x i64>* %0, align 64, !tbaa !2 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_store_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: store <8 x i64> %__A, <8 x i64>* %0, align 64, !tbaa !2 + // X64-NEXT: ret void _mm512_store_epi64 ( __P,__A); } void test_mm512_store_ps(void *p, __m512 a) { - // CHECK-LABEL: @test_mm512_store_ps - // CHECK: store <16 x float> + // APPLE-LABEL: test_mm512_store_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %p to <16 x float>* + // APPLE-NEXT: store <16 x float> %a, <16 x float>* %0, align 64, !tbaa !2 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_store_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %p to <16 x float>* + // X64-NEXT: store <16 x float> %a, <16 x float>* %0, align 64, !tbaa !2 + // X64-NEXT: ret void _mm512_store_ps(p, a); } void test_mm512_store_pd(void *p, __m512d a) { - // CHECK-LABEL: @test_mm512_store_pd - // CHECK: store <8 x double> + // APPLE-LABEL: test_mm512_store_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %p to <8 x double>* + // APPLE-NEXT: store <8 x double> %a, <8 x double>* %0, align 64, !tbaa !2 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_store_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %p to <8 x double>* + // X64-NEXT: store <8 x double> %a, <8 x double>* %0, align 64, !tbaa !2 + // X64-NEXT: ret void _mm512_store_pd(p, a); } void test_mm512_mask_store_pd(void *p, __m512d a, __mmask8 m) { - // CHECK-LABEL: @test_mm512_mask_store_pd - // CHECK: @llvm.masked.store.v8f64.p0v8f64(<8 x double> %{{.*}}, <8 x double>* %{{.*}}, i32 64, <8 x i1> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_store_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %p to <8 x double>* + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: tail call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %a, <8 x double>* %0, i32 64, <8 x i1> %1) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_store_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %p to <8 x double>* + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: tail call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %a, <8 x double>* %0, i32 64, <8 x i1> %1) #12 + // X64-NEXT: ret void _mm512_mask_store_pd(p, m, a); } void test_mm512_storeu_epi32(void *__P, __m512i __A) { - // CHECK-LABEL: @test_mm512_storeu_epi32 - // CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 1{{$}} + // APPLE-LABEL: test_mm512_storeu_epi32 + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: store <8 x i64> %__A, <8 x i64>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_storeu_epi32 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: store <8 x i64> %__A, <8 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret void return _mm512_storeu_epi32(__P, __A); } void test_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_storeu_epi32 - // CHECK: @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %{{.*}}, <16 x i32>* %{{.*}}, i32 1, <16 x i1> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_storeu_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast i8* %__P to <16 x i32>* + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: tail call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %0, <16 x i32>* %1, i32 1, <16 x i1> %2) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_storeu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast i8* %__P to <16 x i32>* + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: tail call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %0, <16 x i32>* %1, i32 1, <16 x i1> %2) #12 + // X64-NEXT: ret void return _mm512_mask_storeu_epi32(__P, __U, __A); } void test_mm512_storeu_epi64(void *__P, __m512i __A) { - // CHECK-LABEL: @test_mm512_storeu_epi64 - // CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 1{{$}} + // APPLE-LABEL: test_mm512_storeu_epi64 + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: store <8 x i64> %__A, <8 x i64>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_storeu_epi64 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: store <8 x i64> %__A, <8 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret void return _mm512_storeu_epi64(__P, __A); } void test_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_storeu_epi64 - // CHECK: @llvm.masked.store.v8i64.p0v8i64(<8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, i32 1, <8 x i1> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_storeu_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: tail call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> %__A, <8 x i64>* %0, i32 1, <8 x i1> %1) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_storeu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: tail call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> %__A, <8 x i64>* %0, i32 1, <8 x i1> %1) #12 + // X64-NEXT: ret void return _mm512_mask_storeu_epi64(__P, __U, __A); } __m512i test_mm512_loadu_si512 (void *__P) { - // CHECK-LABEL: @test_mm512_loadu_si512 - // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 1{{$}} + // APPLE-LABEL: test_mm512_loadu_si512 + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %0 = load <8 x i64>, <8 x i64>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_loadu_si512 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %0 = load <8 x i64>, <8 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret <8 x i64> %0 return _mm512_loadu_si512 ( __P); } __m512i test_mm512_loadu_epi32 (void *__P) { - // CHECK-LABEL: @test_mm512_loadu_epi32 - // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 1{{$}} + // APPLE-LABEL: test_mm512_loadu_epi32 + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %0 = load <8 x i64>, <8 x i64>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_loadu_epi32 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %0 = load <8 x i64>, <8 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret <8 x i64> %0 return _mm512_loadu_epi32 (__P); } __m512i test_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void *__P) { - // CHECK-LABEL: @test_mm512_mask_loadu_epi32 - // CHECK: @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %{{.*}}, i32 1, <16 x i1> %{{.*}}, <16 x i32> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_loadu_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %1 = bitcast i8* %__P to <16 x i32>* + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = tail call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %1, i32 1, <16 x i1> %2, <16 x i32> %0) #12 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_loadu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %1 = bitcast i8* %__P to <16 x i32>* + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = tail call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %1, i32 1, <16 x i1> %2, <16 x i32> %0) #12 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_loadu_epi32 (__W,__U, __P); } __m512i test_mm512_maskz_loadu_epi32 (__mmask16 __U, void *__P) { - // CHECK-LABEL: @test_mm512_maskz_loadu_epi32 - // CHECK: @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %{{.*}}, i32 1, <16 x i1> %{{.*}}, <16 x i32> %{{.*}}) + // APPLE-LABEL: test_mm512_maskz_loadu_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <16 x i32>* + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %0, i32 1, <16 x i1> %1, <16 x i32> zeroinitializer) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_loadu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <16 x i32>* + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %0, i32 1, <16 x i1> %1, <16 x i32> zeroinitializer) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_loadu_epi32 (__U, __P); } __m512i test_mm512_loadu_epi64 (void *__P) { - // CHECK-LABEL: @test_mm512_loadu_epi64 - // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 1{{$}} + // APPLE-LABEL: test_mm512_loadu_epi64 + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %0 = load <8 x i64>, <8 x i64>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_loadu_epi64 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %0 = load <8 x i64>, <8 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret <8 x i64> %0 return _mm512_loadu_epi64 (__P); } __m512i test_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void *__P) { - // CHECK-LABEL: @test_mm512_mask_loadu_epi64 - // CHECK: @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x i64> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_loadu_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* %0, i32 1, <8 x i1> %1, <8 x i64> %__W) #12 + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_loadu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* %0, i32 1, <8 x i1> %1, <8 x i64> %__W) #12 + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_loadu_epi64 (__W,__U, __P); } __m512i test_mm512_maskz_loadu_epi64 (__mmask16 __U, void *__P) { - // CHECK-LABEL: @test_mm512_maskz_loadu_epi64 - // CHECK: @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x i64> %{{.*}}) + // APPLE-LABEL: test_mm512_maskz_loadu_epi64 + // APPLE: entry: + // APPLE-NEXT: %conv = trunc i16 %__U to i8 + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %1 = bitcast i8 %conv to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* %0, i32 1, <8 x i1> %1, <8 x i64> zeroinitializer) #12 + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_loadu_epi64 + // X64: entry: + // X64-NEXT: %conv = trunc i16 %__U to i8 + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %1 = bitcast i8 %conv to <8 x i1> + // X64-NEXT: %2 = tail call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* %0, i32 1, <8 x i1> %1, <8 x i64> zeroinitializer) #12 + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_loadu_epi64 (__U, __P); } __m512 test_mm512_loadu_ps(void *p) { - // CHECK-LABEL: @test_mm512_loadu_ps - // CHECK: load <16 x float>, <16 x float>* {{.*}}, align 1{{$}} + // APPLE-LABEL: test_mm512_loadu_ps + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast i8* %p to <16 x float>* + // APPLE-NEXT: %0 = load <16 x float>, <16 x float>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_loadu_ps + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %p to <16 x float>* + // X64-NEXT: %0 = load <16 x float>, <16 x float>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret <16 x float> %0 return _mm512_loadu_ps(p); } __m512 test_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void *__P) { - // CHECK-LABEL: @test_mm512_mask_loadu_ps - // CHECK: @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %{{.*}}, i32 1, <16 x i1> %{{.*}}, <16 x float> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_loadu_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <16 x float>* + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = tail call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %0, i32 1, <16 x i1> %1, <16 x float> %__W) #12 + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_loadu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <16 x float>* + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = tail call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %0, i32 1, <16 x i1> %1, <16 x float> %__W) #12 + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_loadu_ps (__W,__U, __P); } __m512d test_mm512_loadu_pd(void *p) { - // CHECK-LABEL: @test_mm512_loadu_pd - // CHECK: load <8 x double>, <8 x double>* {{.*}}, align 1{{$}} + // APPLE-LABEL: test_mm512_loadu_pd + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast i8* %p to <8 x double>* + // APPLE-NEXT: %0 = load <8 x double>, <8 x double>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_loadu_pd + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %p to <8 x double>* + // X64-NEXT: %0 = load <8 x double>, <8 x double>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret <8 x double> %0 return _mm512_loadu_pd(p); } __m512d test_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void *__P) { - // CHECK-LABEL: @test_mm512_mask_loadu_pd - // CHECK: @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x double> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_loadu_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x double>* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %0, i32 1, <8 x i1> %1, <8 x double> %__W) #12 + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_loadu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %0, i32 1, <8 x i1> %1, <8 x double> %__W) #12 + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_loadu_pd (__W,__U, __P); } __m512i test_mm512_load_si512 (void *__P) { - // CHECK-LABEL: @test_mm512_load_si512 - // CHECK: [[LI512_1:%.+]] = load i8*, i8** %__P.addr.i, align 8 - // CHECK: [[LI512_2:%.+]] = bitcast i8* [[LI512_1]] to <8 x i64>* - // CHECK: load <8 x i64>, <8 x i64>* [[LI512_2]], align 64 + // APPLE-LABEL: test_mm512_load_si512 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %1 = load <8 x i64>, <8 x i64>* %0, align 64, !tbaa !2 + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_load_si512 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %1 = load <8 x i64>, <8 x i64>* %0, align 64, !tbaa !2 + // X64-NEXT: ret <8 x i64> %1 return _mm512_load_si512 ( __P); } __m512i test_mm512_load_epi32 (void *__P) { - // CHECK-LABEL: @test_mm512_load_epi32 - // CHECK: [[LI32_1:%.+]] = load i8*, i8** %__P.addr.i, align 8 - // CHECK: [[LI32_2:%.+]] = bitcast i8* [[LI32_1]] to <8 x i64>* - // CHECK: load <8 x i64>, <8 x i64>* [[LI32_2]], align 64 + // APPLE-LABEL: test_mm512_load_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %1 = load <8 x i64>, <8 x i64>* %0, align 64, !tbaa !2 + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_load_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %1 = load <8 x i64>, <8 x i64>* %0, align 64, !tbaa !2 + // X64-NEXT: ret <8 x i64> %1 return _mm512_load_epi32 ( __P); } __m512i test_mm512_load_epi64 (void *__P) { - // CHECK-LABEL: @test_mm512_load_epi64 - // CHECK: [[LI64_1:%.+]] = load i8*, i8** %__P.addr.i, align 8 - // CHECK: [[LI64_2:%.+]] = bitcast i8* [[LI64_1]] to <8 x i64>* - // CHECK: load <8 x i64>, <8 x i64>* [[LI64_2]], align 64 + // APPLE-LABEL: test_mm512_load_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %1 = load <8 x i64>, <8 x i64>* %0, align 64, !tbaa !2 + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_load_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %1 = load <8 x i64>, <8 x i64>* %0, align 64, !tbaa !2 + // X64-NEXT: ret <8 x i64> %1 return _mm512_load_epi64 ( __P); } __m512 test_mm512_load_ps(void *p) { - // CHECK-LABEL: @test_mm512_load_ps - // CHECK: load <16 x float>, <16 x float>* %{{.*}}, align 64 + // APPLE-LABEL: test_mm512_load_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %p to <16 x float>* + // APPLE-NEXT: %1 = load <16 x float>, <16 x float>* %0, align 64, !tbaa !2 + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_load_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %p to <16 x float>* + // X64-NEXT: %1 = load <16 x float>, <16 x float>* %0, align 64, !tbaa !2 + // X64-NEXT: ret <16 x float> %1 return _mm512_load_ps(p); } __m512 test_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void *__P) { - // CHECK-LABEL: @test_mm512_mask_load_ps - // CHECK: @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %{{.*}}, i32 64, <16 x i1> %{{.*}}, <16 x float> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_load_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <16 x float>* + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = tail call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %0, i32 64, <16 x i1> %1, <16 x float> %__W) #12 + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_load_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <16 x float>* + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = tail call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %0, i32 64, <16 x i1> %1, <16 x float> %__W) #12 + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_load_ps (__W,__U, __P); } __m512 test_mm512_maskz_load_ps(__mmask16 __U, void *__P) { - // CHECK-LABEL: @test_mm512_maskz_load_ps - // CHECK: @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %{{.*}}, i32 64, <16 x i1> %{{.*}}, <16 x float> %{{.*}}) + // APPLE-LABEL: test_mm512_maskz_load_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <16 x float>* + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = tail call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %0, i32 64, <16 x i1> %1, <16 x float> zeroinitializer) #12 + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_load_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <16 x float>* + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = tail call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %0, i32 64, <16 x i1> %1, <16 x float> zeroinitializer) #12 + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_load_ps(__U, __P); } __m512d test_mm512_load_pd(void *p) { - // CHECK-LABEL: @test_mm512_load_pd - // CHECK: load <8 x double>, <8 x double>* %{{.*}}, align 64 + // APPLE-LABEL: test_mm512_load_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %p to <8 x double>* + // APPLE-NEXT: %1 = load <8 x double>, <8 x double>* %0, align 64, !tbaa !2 + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_load_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %p to <8 x double>* + // X64-NEXT: %1 = load <8 x double>, <8 x double>* %0, align 64, !tbaa !2 + // X64-NEXT: ret <8 x double> %1 return _mm512_load_pd(p); } __m512d test_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void *__P) { - // CHECK-LABEL: @test_mm512_mask_load_pd - // CHECK: @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %{{.*}}, i32 64, <8 x i1> %{{.*}}, <8 x double> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_load_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x double>* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %0, i32 64, <8 x i1> %1, <8 x double> %__W) #12 + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_load_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %0, i32 64, <8 x i1> %1, <8 x double> %__W) #12 + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_load_pd (__W,__U, __P); } __m512d test_mm512_maskz_load_pd(__mmask8 __U, void *__P) { - // CHECK-LABEL: @test_mm512_maskz_load_pd - // CHECK: @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %{{.*}}, i32 64, <8 x i1> %{{.*}}, <8 x double> %{{.*}}) + // APPLE-LABEL: test_mm512_maskz_load_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x double>* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %0, i32 64, <8 x i1> %1, <8 x double> zeroinitializer) #12 + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_load_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %0, i32 64, <8 x i1> %1, <8 x double> zeroinitializer) #12 + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_load_pd(__U, __P); } __m512d test_mm512_set1_pd(double d) { - // CHECK-LABEL: @test_mm512_set1_pd - // CHECK: insertelement <8 x double> {{.*}}, i32 0 - // CHECK: insertelement <8 x double> {{.*}}, i32 1 - // CHECK: insertelement <8 x double> {{.*}}, i32 2 - // CHECK: insertelement <8 x double> {{.*}}, i32 3 - // CHECK: insertelement <8 x double> {{.*}}, i32 4 - // CHECK: insertelement <8 x double> {{.*}}, i32 5 - // CHECK: insertelement <8 x double> {{.*}}, i32 6 - // CHECK: insertelement <8 x double> {{.*}}, i32 7 + // APPLE-LABEL: test_mm512_set1_pd + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <8 x double> undef, double %d, i32 0 + // APPLE-NEXT: %vecinit7.i = shufflevector <8 x double> %vecinit.i, <8 x double> undef, <8 x i32> zeroinitializer + // APPLE-NEXT: ret <8 x double> %vecinit7.i + // X64-LABEL: test_mm512_set1_pd + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <8 x double> undef, double %d, i32 0 + // X64-NEXT: %vecinit7.i = shufflevector <8 x double> %vecinit.i, <8 x double> undef, <8 x i32> zeroinitializer + // X64-NEXT: ret <8 x double> %vecinit7.i return _mm512_set1_pd(d); } __mmask16 test_mm512_knot(__mmask16 a) { - // CHECK-LABEL: @test_mm512_knot - // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], - // CHECK: bitcast <16 x i1> [[NOT]] to i16 + // APPLE-LABEL: test_mm512_knot + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i16 %a to <16 x i1> + // APPLE-NEXT: %1 = xor <16 x i1> %0, + // APPLE-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // APPLE-NEXT: ret i16 %2 + // X64-LABEL: test_mm512_knot + // X64: entry: + // X64-NEXT: %0 = bitcast i16 %a to <16 x i1> + // X64-NEXT: %1 = xor <16 x i1> %0, + // X64-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // X64-NEXT: ret i16 %2 return _mm512_knot(a); } __m512i test_mm512_alignr_epi32(__m512i a, __m512i b) { - // CHECK-LABEL: @test_mm512_alignr_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_alignr_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %b to <16 x i32> + // APPLE-NEXT: %valign = shufflevector <16 x i32> %1, <16 x i32> %0, <16 x i32> + // APPLE-NEXT: %2 = bitcast <16 x i32> %valign to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_alignr_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %b to <16 x i32> + // X64-NEXT: %valign = shufflevector <16 x i32> %1, <16 x i32> %0, <16 x i32> + // X64-NEXT: %2 = bitcast <16 x i32> %valign to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_alignr_epi32(a, b, 2); } __m512i test_mm512_mask_alignr_epi32(__m512i w, __mmask16 u, __m512i a, __m512i b) { - // CHECK-LABEL: @test_mm512_mask_alignr_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> {{.*}} + // APPLE-LABEL: test_mm512_mask_alignr_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %b to <16 x i32> + // APPLE-NEXT: %valign = shufflevector <16 x i32> %1, <16 x i32> %0, <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %w to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %u to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %valign, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_alignr_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %b to <16 x i32> + // X64-NEXT: %valign = shufflevector <16 x i32> %1, <16 x i32> %0, <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %w to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %u to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %valign, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_alignr_epi32(w, u, a, b, 2); } __m512i test_mm512_maskz_alignr_epi32( __mmask16 u, __m512i a, __m512i b) { - // CHECK-LABEL: @test_mm512_maskz_alignr_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> {{.*}} + // APPLE-LABEL: test_mm512_maskz_alignr_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %b to <16 x i32> + // APPLE-NEXT: %valign = shufflevector <16 x i32> %1, <16 x i32> %0, <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %u to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %valign, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_alignr_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %b to <16 x i32> + // X64-NEXT: %valign = shufflevector <16 x i32> %1, <16 x i32> %0, <16 x i32> + // X64-NEXT: %2 = bitcast i16 %u to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %valign, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_alignr_epi32(u, a, b, 2); } __m512i test_mm512_alignr_epi64(__m512i a, __m512i b) { - // CHECK-LABEL: @test_mm512_alignr_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_alignr_epi64 + // APPLE: entry: + // APPLE-NEXT: %valign = shufflevector <8 x i64> %b, <8 x i64> %a, <8 x i32> + // APPLE-NEXT: ret <8 x i64> %valign + // X64-LABEL: test_mm512_alignr_epi64 + // X64: entry: + // X64-NEXT: %valign = shufflevector <8 x i64> %b, <8 x i64> %a, <8 x i32> + // X64-NEXT: ret <8 x i64> %valign return _mm512_alignr_epi64(a, b, 2); } __m512i test_mm512_mask_alignr_epi64(__m512i w, __mmask8 u, __m512i a, __m512i b) { - // CHECK-LABEL: @test_mm512_mask_alignr_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> {{.*}} + // APPLE-LABEL: test_mm512_mask_alignr_epi64 + // APPLE: entry: + // APPLE-NEXT: %valign = shufflevector <8 x i64> %b, <8 x i64> %a, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %u to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %valign, <8 x i64> %w + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_alignr_epi64 + // X64: entry: + // X64-NEXT: %valign = shufflevector <8 x i64> %b, <8 x i64> %a, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %u to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %valign, <8 x i64> %w + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_alignr_epi64(w, u, a, b, 2); } __m512i test_mm512_maskz_alignr_epi64( __mmask8 u, __m512i a, __m512i b) { - // CHECK-LABEL: @test_mm512_maskz_alignr_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> {{.*}} + // APPLE-LABEL: test_mm512_maskz_alignr_epi64 + // APPLE: entry: + // APPLE-NEXT: %valign = shufflevector <8 x i64> %b, <8 x i64> %a, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %u to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %valign, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_alignr_epi64 + // X64: entry: + // X64-NEXT: %valign = shufflevector <8 x i64> %b, <8 x i64> %a, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %u to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %valign, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_alignr_epi64(u, a, b, 2); } __m512d test_mm512_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmadd_round_pd - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 + // APPLE-LABEL: test_mm512_fmadd_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_fmadd_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_fmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_fmadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmadd_round_pd - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmadd_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_fmadd_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_fmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask3_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmadd_round_pd - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmadd_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask3_fmadd_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + // X64-NEXT: ret <8 x double> %2 return _mm512_mask3_fmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_fmadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmadd_round_pd - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmadd_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_fmadd_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_fmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmsub_round_pd - // CHECK: fsub <8 x double> - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 + // APPLE-LABEL: test_mm512_fmsub_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_fmsub_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_fmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_fmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmsub_round_pd - // CHECK: fsub <8 x double> - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmsub_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_fmsub_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_fmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_fmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsub_round_pd - // CHECK: fsub <8 x double> - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmsub_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_fmsub_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_fmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_fnmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fnmadd_round_pd - // CHECK: fsub <8 x double> - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 + // APPLE-LABEL: test_mm512_fnmadd_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__A + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_fnmadd_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__A + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_fnmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask3_fnmadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmadd_round_pd - // CHECK: fsub <8 x double> - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fnmadd_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__A + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask3_fnmadd_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__A + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + // X64-NEXT: ret <8 x double> %2 return _mm512_mask3_fnmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_fnmadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmadd_round_pd - // CHECK: fsub <8 x double> - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fnmadd_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__A + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_fnmadd_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__A + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_fnmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fnmsub_round_pd - // CHECK: fsub <8 x double> - // CHECK: fsub <8 x double> - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 + // APPLE-LABEL: test_mm512_fnmsub_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__A + // APPLE-NEXT: %sub1 = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %sub1, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_fnmsub_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__A + // X64-NEXT: %sub1 = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %sub1, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_fnmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_fnmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmsub_round_pd - // CHECK: fsub <8 x double> - // CHECK: fsub <8 x double> - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fnmsub_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__A + // APPLE-NEXT: %sub1 = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %sub1, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_fnmsub_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__A + // X64-NEXT: %sub1 = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %sub1, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_fnmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmadd_pd - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // APPLE-LABEL: test_mm512_fmadd_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_fmadd_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_fmadd_pd(__A, __B, __C); } __m512d test_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmadd_pd - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmadd_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_fmadd_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_fmadd_pd(__A, __U, __B, __C); } __m512d test_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmadd_pd - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmadd_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask3_fmadd_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + // X64-NEXT: ret <8 x double> %2 return _mm512_mask3_fmadd_pd(__A, __B, __C, __U); } __m512d test_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmadd_pd - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmadd_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_fmadd_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_fmadd_pd(__U, __A, __B, __C); } __m512d test_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmsub_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // APPLE-LABEL: test_mm512_fmsub_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_fmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_fmsub_pd(__A, __B, __C); } __m512d test_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmsub_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmsub_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_fmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_fmsub_pd(__A, __U, __B, __C); } __m512d test_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsub_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmsub_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_fmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_fmsub_pd(__U, __A, __B, __C); } __m512d test_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fnmadd_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // APPLE-LABEL: test_mm512_fnmadd_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__B + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %sub.i, <8 x double> %__C) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_fnmadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__B + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %sub.i, <8 x double> %__C) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_fnmadd_pd(__A, __B, __C); } __m512d test_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmadd_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fnmadd_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__A + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask3_fnmadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__A + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + // X64-NEXT: ret <8 x double> %2 return _mm512_mask3_fnmadd_pd(__A, __B, __C, __U); } __m512d test_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmadd_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fnmadd_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__A + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_fnmadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__A + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_fnmadd_pd(__U, __A, __B, __C); } __m512d test_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fnmsub_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // APPLE-LABEL: test_mm512_fnmsub_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__B + // APPLE-NEXT: %sub1.i = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %sub.i, <8 x double> %sub1.i) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_fnmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__B + // X64-NEXT: %sub1.i = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %sub.i, <8 x double> %sub1.i) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_fnmsub_pd(__A, __B, __C); } __m512d test_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmsub_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fnmsub_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__A + // APPLE-NEXT: %sub1.i = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %sub1.i) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_fnmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__A + // X64-NEXT: %sub1.i = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %sub1.i) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_fnmsub_pd(__U, __A, __B, __C); } __m512 test_mm512_fmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmadd_round_ps - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 + // APPLE-LABEL: test_mm512_fmadd_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_fmadd_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // X64-NEXT: ret <16 x float> %0 return _mm512_fmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_fmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmadd_round_ps - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmadd_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_fmadd_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_fmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask3_fmadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmadd_round_ps - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmadd_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask3_fmadd_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + // X64-NEXT: ret <16 x float> %2 return _mm512_mask3_fmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_fmadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmadd_round_ps - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmadd_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_fmadd_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_fmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmsub_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 + // APPLE-LABEL: test_mm512_fmsub_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_fmsub_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) + // X64-NEXT: ret <16 x float> %0 return _mm512_fmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_fmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmsub_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmsub_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_fmsub_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_fmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_fmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsub_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmsub_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_fmsub_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_fmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_fnmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fnmadd_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 + // APPLE-LABEL: test_mm512_fnmadd_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__B + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %sub, <16 x float> %__C, i32 8) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_fnmadd_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__B + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %sub, <16 x float> %__C, i32 8) + // X64-NEXT: ret <16 x float> %0 return _mm512_fnmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask3_fnmadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmadd_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fnmadd_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__A + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask3_fnmadd_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__A + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %__C, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + // X64-NEXT: ret <16 x float> %2 return _mm512_mask3_fnmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_fnmadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmadd_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fnmadd_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__A + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_fnmadd_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__A + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %__C, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_fnmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fnmsub_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 + // APPLE-LABEL: test_mm512_fnmsub_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__B + // APPLE-NEXT: %sub1 = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %sub, <16 x float> %sub1, i32 8) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_fnmsub_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__B + // X64-NEXT: %sub1 = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %sub, <16 x float> %sub1, i32 8) + // X64-NEXT: ret <16 x float> %0 return _mm512_fnmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_fnmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmsub_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fnmsub_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__A + // APPLE-NEXT: %sub1 = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %sub1, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_fnmsub_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__A + // X64-NEXT: %sub1 = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %sub1, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_fnmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmadd_ps - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // APPLE-LABEL: test_mm512_fmadd_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_fmadd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_fmadd_ps(__A, __B, __C); } __m512 test_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmadd_ps - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_fmadd_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_fmadd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_fmadd_ps(__A, __U, __B, __C); } __m512 test_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmadd_ps - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmadd_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask3_fmadd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + // X64-NEXT: ret <16 x float> %2 return _mm512_mask3_fmadd_ps(__A, __B, __C, __U); } __m512 test_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmadd_ps - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmadd_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_fmadd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_fmadd_ps(__U, __A, __B, __C); } __m512 test_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmsub_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // APPLE-LABEL: test_mm512_fmsub_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_fmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_fmsub_ps(__A, __B, __C); } __m512 test_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmsub_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmsub_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_fmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_fmsub_ps(__A, __U, __B, __C); } __m512 test_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsub_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmsub_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_fmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_fmsub_ps(__U, __A, __B, __C); } __m512 test_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fnmadd_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // APPLE-LABEL: test_mm512_fnmadd_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__B + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %sub.i, <16 x float> %__C) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_fnmadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__B + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %sub.i, <16 x float> %__C) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_fnmadd_ps(__A, __B, __C); } __m512 test_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmadd_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fnmadd_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__A + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask3_fnmadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__A + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + // X64-NEXT: ret <16 x float> %2 return _mm512_mask3_fnmadd_ps(__A, __B, __C, __U); } __m512 test_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmadd_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fnmadd_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__A + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_fnmadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__A + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_fnmadd_ps(__U, __A, __B, __C); } __m512 test_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fnmsub_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // APPLE-LABEL: test_mm512_fnmsub_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__B + // APPLE-NEXT: %sub1.i = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %sub.i, <16 x float> %sub1.i) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_fnmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__B + // X64-NEXT: %sub1.i = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %sub.i, <16 x float> %sub1.i) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_fnmsub_ps(__A, __B, __C); } __m512 test_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmsub_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fnmsub_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__A + // APPLE-NEXT: %sub1.i = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %sub1.i) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_fnmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__A + // X64-NEXT: %sub1.i = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %sub1.i) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_fnmsub_ps(__U, __A, __B, __C); } __m512d test_mm512_fmaddsub_round_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmaddsub_round_pd - // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 + // APPLE-LABEL: test_mm512_fmaddsub_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_fmaddsub_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_fmaddsub_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_fmaddsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmaddsub_round_pd - // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmaddsub_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_fmaddsub_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_fmaddsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask3_fmaddsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmaddsub_round_pd - // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmaddsub_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask3_fmaddsub_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + // X64-NEXT: ret <8 x double> %2 return _mm512_mask3_fmaddsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_fmaddsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmaddsub_round_pd - // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmaddsub_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_fmaddsub_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_fmaddsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmsubadd_round_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 + // APPLE-LABEL: test_mm512_fmsubadd_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_fmsubadd_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_fmsubadd_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_fmsubadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmsubadd_round_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmsubadd_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_fmsubadd_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_fmsubadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_fmsubadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsubadd_round_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmsubadd_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_fmsubadd_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_fmsubadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmaddsub_pd - // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <8 x double> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]] - // CHECK: shufflevector <8 x double> [[SUB]], <8 x double> [[ADD]], <8 x i32> + // APPLE-LABEL: test_mm512_fmaddsub_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: %1 = fsub <8 x double> , %__C + // APPLE-NEXT: %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #12 + // APPLE-NEXT: %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> + // APPLE-NEXT: ret <8 x double> %3 + // X64-LABEL: test_mm512_fmaddsub_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: %1 = fsub <8 x double> , %__C + // X64-NEXT: %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #12 + // X64-NEXT: %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> + // X64-NEXT: ret <8 x double> %3 return _mm512_fmaddsub_pd(__A, __B, __C); } __m512d test_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmaddsub_pd - // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <8 x double> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]] - // CHECK: shufflevector <8 x double> [[SUB]], <8 x double> [[ADD]], <8 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmaddsub_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: %1 = fsub <8 x double> , %__C + // APPLE-NEXT: %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #12 + // APPLE-NEXT: %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %__A + // APPLE-NEXT: ret <8 x double> %5 + // X64-LABEL: test_mm512_mask_fmaddsub_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: %1 = fsub <8 x double> , %__C + // X64-NEXT: %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #12 + // X64-NEXT: %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %__A + // X64-NEXT: ret <8 x double> %5 return _mm512_mask_fmaddsub_pd(__A, __U, __B, __C); } __m512d test_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmaddsub_pd - // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <8 x double> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]] - // CHECK: shufflevector <8 x double> [[SUB]], <8 x double> [[ADD]], <8 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmaddsub_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: %1 = fsub <8 x double> , %__C + // APPLE-NEXT: %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #12 + // APPLE-NEXT: %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %__C + // APPLE-NEXT: ret <8 x double> %5 + // X64-LABEL: test_mm512_mask3_fmaddsub_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: %1 = fsub <8 x double> , %__C + // X64-NEXT: %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #12 + // X64-NEXT: %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %__C + // X64-NEXT: ret <8 x double> %5 return _mm512_mask3_fmaddsub_pd(__A, __B, __C, __U); } __m512d test_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmaddsub_pd - // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <8 x double> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]] - // CHECK: shufflevector <8 x double> [[SUB]], <8 x double> [[ADD]], <8 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmaddsub_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: %1 = fsub <8 x double> , %__C + // APPLE-NEXT: %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #12 + // APPLE-NEXT: %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %5 + // X64-LABEL: test_mm512_maskz_fmaddsub_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: %1 = fsub <8 x double> , %__C + // X64-NEXT: %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #12 + // X64-NEXT: %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %5 return _mm512_maskz_fmaddsub_pd(__U, __A, __B, __C); } __m512d test_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmsubadd_pd - // CHECK: [[NEG:%.+]] = fsub <8 x double> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]] - // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: shufflevector <8 x double> [[ADD]], <8 x double> [[SUB]], <8 x i32> + // APPLE-LABEL: test_mm512_fmsubadd_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #12 + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_fmsubadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #12 + // X64-NEXT: %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> + // X64-NEXT: ret <8 x double> %2 return _mm512_fmsubadd_pd(__A, __B, __C); } __m512d test_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmsubadd_pd - // CHECK: [[NEG:%.+]] = fsub <8 x double> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]] - // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: shufflevector <8 x double> [[ADD]], <8 x double> [[SUB]], <8 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmsubadd_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #12 + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> + // APPLE-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__A + // APPLE-NEXT: ret <8 x double> %4 + // X64-LABEL: test_mm512_mask_fmsubadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #12 + // X64-NEXT: %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__A + // X64-NEXT: ret <8 x double> %4 return _mm512_mask_fmsubadd_pd(__A, __U, __B, __C); } __m512d test_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsubadd_pd - // CHECK: [[NEG:%.+]] = fsub <8 x double> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]] - // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: shufflevector <8 x double> [[ADD]], <8 x double> [[SUB]], <8 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmsubadd_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #12 + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> + // APPLE-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %4 + // X64-LABEL: test_mm512_maskz_fmsubadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #12 + // X64-NEXT: %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %4 return _mm512_maskz_fmsubadd_pd(__U, __A, __B, __C); } __m512 test_mm512_fmaddsub_round_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmaddsub_round_ps - // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 + // APPLE-LABEL: test_mm512_fmaddsub_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_fmaddsub_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // X64-NEXT: ret <16 x float> %0 return _mm512_fmaddsub_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_fmaddsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmaddsub_round_ps - // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmaddsub_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_fmaddsub_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_fmaddsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask3_fmaddsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmaddsub_round_ps - // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmaddsub_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask3_fmaddsub_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + // X64-NEXT: ret <16 x float> %2 return _mm512_mask3_fmaddsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_fmaddsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmaddsub_round_ps - // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmaddsub_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_fmaddsub_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_fmaddsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmsubadd_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 + // APPLE-LABEL: test_mm512_fmsubadd_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_fmsubadd_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) + // X64-NEXT: ret <16 x float> %0 return _mm512_fmsubadd_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_fmsubadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmsubadd_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmsubadd_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_fmsubadd_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_fmsubadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_fmsubadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsubadd_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmsubadd_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_fmsubadd_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_fmsubadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmaddsub_ps - // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <16 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]] - // CHECK: shufflevector <16 x float> [[SUB]], <16 x float> [[ADD]], <16 x i32> + // APPLE-LABEL: test_mm512_fmaddsub_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: %1 = fsub <16 x float> , %__C + // APPLE-NEXT: %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #12 + // APPLE-NEXT: %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_fmaddsub_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: %1 = fsub <16 x float> , %__C + // X64-NEXT: %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #12 + // X64-NEXT: %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> + // X64-NEXT: ret <16 x float> %3 return _mm512_fmaddsub_ps(__A, __B, __C); } __m512 test_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmaddsub_ps - // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <16 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]] - // CHECK: shufflevector <16 x float> [[SUB]], <16 x float> [[ADD]], <16 x i32> - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmaddsub_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: %1 = fsub <16 x float> , %__C + // APPLE-NEXT: %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #12 + // APPLE-NEXT: %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %__A + // APPLE-NEXT: ret <16 x float> %5 + // X64-LABEL: test_mm512_mask_fmaddsub_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: %1 = fsub <16 x float> , %__C + // X64-NEXT: %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #12 + // X64-NEXT: %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %__A + // X64-NEXT: ret <16 x float> %5 return _mm512_mask_fmaddsub_ps(__A, __U, __B, __C); } __m512 test_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmaddsub_ps - // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <16 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]] - // CHECK: shufflevector <16 x float> [[SUB]], <16 x float> [[ADD]], <16 x i32> - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmaddsub_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: %1 = fsub <16 x float> , %__C + // APPLE-NEXT: %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #12 + // APPLE-NEXT: %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %__C + // APPLE-NEXT: ret <16 x float> %5 + // X64-LABEL: test_mm512_mask3_fmaddsub_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: %1 = fsub <16 x float> , %__C + // X64-NEXT: %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #12 + // X64-NEXT: %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %__C + // X64-NEXT: ret <16 x float> %5 return _mm512_mask3_fmaddsub_ps(__A, __B, __C, __U); } __m512 test_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmaddsub_ps - // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <16 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]] - // CHECK: shufflevector <16 x float> [[SUB]], <16 x float> [[ADD]], <16 x i32> - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmaddsub_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: %1 = fsub <16 x float> , %__C + // APPLE-NEXT: %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #12 + // APPLE-NEXT: %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %5 + // X64-LABEL: test_mm512_maskz_fmaddsub_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: %1 = fsub <16 x float> , %__C + // X64-NEXT: %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #12 + // X64-NEXT: %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %5 return _mm512_maskz_fmaddsub_ps(__U, __A, __B, __C); } __m512 test_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmsubadd_ps - // CHECK: [[NEG:%.+]] = fsub <16 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]] - // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: shufflevector <16 x float> [[ADD]], <16 x float> [[SUB]], <16 x i32> + // APPLE-LABEL: test_mm512_fmsubadd_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #12 + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_fmsubadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #12 + // X64-NEXT: %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> + // X64-NEXT: ret <16 x float> %2 return _mm512_fmsubadd_ps(__A, __B, __C); } __m512 test_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmsubadd_ps - // CHECK: [[NEG:%.+]] = fsub <16 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]] - // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: shufflevector <16 x float> [[ADD]], <16 x float> [[SUB]], <16 x i32> - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fmsubadd_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #12 + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__A + // APPLE-NEXT: ret <16 x float> %4 + // X64-LABEL: test_mm512_mask_fmsubadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #12 + // X64-NEXT: %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__A + // X64-NEXT: ret <16 x float> %4 return _mm512_mask_fmsubadd_ps(__A, __U, __B, __C); } __m512 test_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsubadd_ps - // CHECK: [[NEG:%.+]] = fsub <16 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]] - // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: shufflevector <16 x float> [[ADD]], <16 x float> [[SUB]], <16 x i32> - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_fmsubadd_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #12 + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %4 + // X64-LABEL: test_mm512_maskz_fmsubadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #12 + // X64-NEXT: %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %4 return _mm512_maskz_fmsubadd_ps(__U, __A, __B, __C); } __m512d test_mm512_mask3_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsub_round_pd - // CHECK: fsub <8 x double> - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmsub_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = fsub <8 x double> , %__C + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %0, i32 8) + // APPLE-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %__C + // APPLE-NEXT: ret <8 x double> %3 + // X64-LABEL: test_mm512_mask3_fmsub_round_pd + // X64: entry: + // X64-NEXT: %0 = fsub <8 x double> , %__C + // X64-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %0, i32 8) + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %__C + // X64-NEXT: ret <8 x double> %3 return _mm512_mask3_fmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsub_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmsub_pd + // APPLE: entry: + // APPLE-NEXT: %0 = fsub <8 x double> , %__C + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %0) #12 + // APPLE-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %__C + // APPLE-NEXT: ret <8 x double> %3 + // X64-LABEL: test_mm512_mask3_fmsub_pd + // X64: entry: + // X64-NEXT: %0 = fsub <8 x double> , %__C + // X64-NEXT: %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %0) #12 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %__C + // X64-NEXT: ret <8 x double> %3 return _mm512_mask3_fmsub_pd(__A, __B, __C, __U); } __m512 test_mm512_mask3_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsub_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmsub_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = fsub <16 x float> , %__C + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %0, i32 8) + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__C + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_mask3_fmsub_round_ps + // X64: entry: + // X64-NEXT: %0 = fsub <16 x float> , %__C + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %0, i32 8) + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__C + // X64-NEXT: ret <16 x float> %3 return _mm512_mask3_fmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsub_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmsub_ps + // APPLE: entry: + // APPLE-NEXT: %0 = fsub <16 x float> , %__C + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %0) #12 + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__C + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_mask3_fmsub_ps + // X64: entry: + // X64-NEXT: %0 = fsub <16 x float> , %__C + // X64-NEXT: %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %0) #12 + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__C + // X64-NEXT: ret <16 x float> %3 return _mm512_mask3_fmsub_ps(__A, __B, __C, __U); } __m512d test_mm512_mask3_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsubadd_round_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmsubadd_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = fsub <8 x double> , %__C + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %0, i32 8) + // APPLE-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %__C + // APPLE-NEXT: ret <8 x double> %3 + // X64-LABEL: test_mm512_mask3_fmsubadd_round_pd + // X64: entry: + // X64-NEXT: %0 = fsub <8 x double> , %__C + // X64-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %0, i32 8) + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %__C + // X64-NEXT: ret <8 x double> %3 return _mm512_mask3_fmsubadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsubadd_pd - // CHECK: [[NEG:%.+]] = fsub <8 x double> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]] - // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: shufflevector <8 x double> [[ADD]], <8 x double> [[SUB]], <8 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmsubadd_pd + // APPLE: entry: + // APPLE-NEXT: %0 = fsub <8 x double> , %__C + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %0) #12 + // APPLE-NEXT: %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // APPLE-NEXT: %3 = shufflevector <8 x double> %2, <8 x double> %1, <8 x i32> + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %__C + // APPLE-NEXT: ret <8 x double> %5 + // X64-LABEL: test_mm512_mask3_fmsubadd_pd + // X64: entry: + // X64-NEXT: %0 = fsub <8 x double> , %__C + // X64-NEXT: %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %0) #12 + // X64-NEXT: %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #12 + // X64-NEXT: %3 = shufflevector <8 x double> %2, <8 x double> %1, <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %__C + // X64-NEXT: ret <8 x double> %5 return _mm512_mask3_fmsubadd_pd(__A, __B, __C, __U); } __m512 test_mm512_mask3_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsubadd_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmsubadd_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = fsub <16 x float> , %__C + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %0, i32 8) + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__C + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_mask3_fmsubadd_round_ps + // X64: entry: + // X64-NEXT: %0 = fsub <16 x float> , %__C + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %0, i32 8) + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__C + // X64-NEXT: ret <16 x float> %3 return _mm512_mask3_fmsubadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsubadd_ps - // CHECK: [[NEG:%.+]] = fsub <16 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]] - // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: shufflevector <16 x float> [[ADD]], <16 x float> [[SUB]], <16 x i32> - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fmsubadd_ps + // APPLE: entry: + // APPLE-NEXT: %0 = fsub <16 x float> , %__C + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %0) #12 + // APPLE-NEXT: %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // APPLE-NEXT: %3 = shufflevector <16 x float> %2, <16 x float> %1, <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %__C + // APPLE-NEXT: ret <16 x float> %5 + // X64-LABEL: test_mm512_mask3_fmsubadd_ps + // X64: entry: + // X64-NEXT: %0 = fsub <16 x float> , %__C + // X64-NEXT: %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %0) #12 + // X64-NEXT: %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #12 + // X64-NEXT: %3 = shufflevector <16 x float> %2, <16 x float> %1, <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %__C + // X64-NEXT: ret <16 x float> %5 return _mm512_mask3_fmsubadd_ps(__A, __B, __C, __U); } __m512d test_mm512_mask_fnmadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fnmadd_round_pd - // CHECK: fsub <8 x double> - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fnmadd_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__B + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %sub, <8 x double> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_fnmadd_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__B + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %sub, <8 x double> %__C, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_fnmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fnmadd_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fnmadd_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__B + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %sub.i, <8 x double> %__C) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_fnmadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__B + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %sub.i, <8 x double> %__C) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_fnmadd_pd(__A, __U, __B, __C); } __m512 test_mm512_mask_fnmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fnmadd_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fnmadd_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__B + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %sub, <16 x float> %__C, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_fnmadd_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__B + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %sub, <16 x float> %__C, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_fnmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fnmadd_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fnmadd_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__B + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %sub.i, <16 x float> %__C) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_fnmadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__B + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %sub.i, <16 x float> %__C) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_fnmadd_ps(__A, __U, __B, __C); } __m512d test_mm512_mask_fnmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fnmsub_round_pd - // CHECK: fsub <8 x double> - // CHECK: fsub <8 x double> - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fnmsub_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__B + // APPLE-NEXT: %sub1 = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %sub, <8 x double> %sub1, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_fnmsub_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__B + // X64-NEXT: %sub1 = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %sub, <8 x double> %sub1, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_fnmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask3_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmsub_round_pd - // CHECK: fsub <8 x double> - // CHECK: fsub <8 x double> - // CHECK: @llvm.x86.avx512.vfmadd.pd.512 - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fnmsub_round_pd + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <8 x double> , %__A + // APPLE-NEXT: %0 = fsub <8 x double> , %__C + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %0, i32 8) + // APPLE-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %__C + // APPLE-NEXT: ret <8 x double> %3 + // X64-LABEL: test_mm512_mask3_fnmsub_round_pd + // X64: entry: + // X64-NEXT: %sub = fsub <8 x double> , %__A + // X64-NEXT: %0 = fsub <8 x double> , %__C + // X64-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %0, i32 8) + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %__C + // X64-NEXT: ret <8 x double> %3 return _mm512_mask3_fnmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fnmsub_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fnmsub_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__B + // APPLE-NEXT: %sub1.i = fsub <8 x double> , %__C + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %sub.i, <8 x double> %sub1.i) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_fnmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__B + // X64-NEXT: %sub1.i = fsub <8 x double> , %__C + // X64-NEXT: %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %sub.i, <8 x double> %sub1.i) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_fnmsub_pd(__A, __U, __B, __C); } __m512d test_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmsub_pd - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: fsub <8 x double> , %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fnmsub_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <8 x double> , %__A + // APPLE-NEXT: %0 = fsub <8 x double> , %__C + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %0) #12 + // APPLE-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %__C + // APPLE-NEXT: ret <8 x double> %3 + // X64-LABEL: test_mm512_mask3_fnmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x double> , %__A + // X64-NEXT: %0 = fsub <8 x double> , %__C + // X64-NEXT: %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %0) #12 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %__C + // X64-NEXT: ret <8 x double> %3 return _mm512_mask3_fnmsub_pd(__A, __B, __C, __U); } __m512 test_mm512_mask_fnmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fnmsub_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fnmsub_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__B + // APPLE-NEXT: %sub1 = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %sub, <16 x float> %sub1, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_fnmsub_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__B + // X64-NEXT: %sub1 = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %sub, <16 x float> %sub1, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_fnmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask3_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmsub_round_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: @llvm.x86.avx512.vfmadd.ps.512 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fnmsub_round_ps + // APPLE: entry: + // APPLE-NEXT: %sub = fsub <16 x float> , %__A + // APPLE-NEXT: %0 = fsub <16 x float> , %__C + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %0, i32 8) + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__C + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_mask3_fnmsub_round_ps + // X64: entry: + // X64-NEXT: %sub = fsub <16 x float> , %__A + // X64-NEXT: %0 = fsub <16 x float> , %__C + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %0, i32 8) + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__C + // X64-NEXT: ret <16 x float> %3 return _mm512_mask3_fnmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fnmsub_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_fnmsub_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__B + // APPLE-NEXT: %sub1.i = fsub <16 x float> , %__C + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %sub.i, <16 x float> %sub1.i) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_fnmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__B + // X64-NEXT: %sub1.i = fsub <16 x float> , %__C + // X64-NEXT: %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %sub.i, <16 x float> %sub1.i) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_fnmsub_ps(__A, __U, __B, __C); } __m512 test_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmsub_ps - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: fsub <16 x float> , %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask3_fnmsub_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i = fsub <16 x float> , %__A + // APPLE-NEXT: %0 = fsub <16 x float> , %__C + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %0) #12 + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__C + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_mask3_fnmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <16 x float> , %__A + // X64-NEXT: %0 = fsub <16 x float> , %__C + // X64-NEXT: %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %0) #12 + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__C + // X64-NEXT: ret <16 x float> %3 return _mm512_mask3_fnmsub_ps(__A, __B, __C, __U); } __mmask16 test_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpeq_epi32_mask - // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpeq_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmpeq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmpeq_epi32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpeq_epi32_mask - // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpeq_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmpeq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmpeq_epi32_mask(__u, __a, __b); } __mmask8 test_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpeq_epi64_mask - // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpeq_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpeq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmpeq_epi64_mask(__u, __a, __b); } __mmask8 test_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpeq_epi64_mask - // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpeq_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpeq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmpeq_epi64_mask(__a, __b); } __mmask16 test_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpgt_epi32_mask - // CHECK: icmp sgt <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpgt_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp sgt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmpgt_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp sgt <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmpgt_epi32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpgt_epi32_mask - // CHECK: icmp sgt <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpgt_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp sgt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmpgt_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp sgt <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmpgt_epi32_mask(__u, __a, __b); } __mmask8 test_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpgt_epi64_mask - // CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpgt_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp sgt <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpgt_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sgt <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmpgt_epi64_mask(__u, __a, __b); } __mmask8 test_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpgt_epi64_mask - // CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpgt_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp sgt <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpgt_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sgt <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmpgt_epi64_mask(__a, __b); } __m512d test_mm512_unpackhi_pd(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_unpackhi_pd - // CHECK: shufflevector <8 x double> {{.*}} + // APPLE-LABEL: test_mm512_unpackhi_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> + // APPLE-NEXT: ret <8 x double> %shuffle.i + // X64-LABEL: test_mm512_unpackhi_pd + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> + // X64-NEXT: ret <8 x double> %shuffle.i return _mm512_unpackhi_pd(a, b); } __m512d test_mm512_unpacklo_pd(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_unpacklo_pd - // CHECK: shufflevector <8 x double> {{.*}} + // APPLE-LABEL: test_mm512_unpacklo_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> + // APPLE-NEXT: ret <8 x double> %shuffle.i + // X64-LABEL: test_mm512_unpacklo_pd + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> + // X64-NEXT: ret <8 x double> %shuffle.i return _mm512_unpacklo_pd(a, b); } __m512 test_mm512_unpackhi_ps(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_unpackhi_ps - // CHECK: shufflevector <16 x float> {{.*}} + // APPLE-LABEL: test_mm512_unpackhi_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> + // APPLE-NEXT: ret <16 x float> %shuffle.i + // X64-LABEL: test_mm512_unpackhi_ps + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> + // X64-NEXT: ret <16 x float> %shuffle.i return _mm512_unpackhi_ps(a, b); } __m512 test_mm512_unpacklo_ps(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_unpacklo_ps - // CHECK: shufflevector <16 x float> {{.*}} + // APPLE-LABEL: test_mm512_unpacklo_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> + // APPLE-NEXT: ret <16 x float> %shuffle.i + // X64-LABEL: test_mm512_unpacklo_ps + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> + // X64-NEXT: ret <16 x float> %shuffle.i return _mm512_unpacklo_ps(a, b); } __mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmp_round_ps_mask - // CHECK: fcmp oeq <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_round_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_round_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_round_ps_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask16 test_mm512_mask_cmp_round_ps_mask(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmp_round_ps_mask - // CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_round_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_round_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_round_ps_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask16 test_mm512_cmp_ps_mask_eq_oq(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmp_ps_mask_eq_oq - // CHECK: fcmp oeq <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_eq_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_eq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ); } __mmask16 test_mm512_cmp_ps_mask_lt_os(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_lt_os - // CHECK: fcmp olt <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_lt_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp olt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_lt_os + // X64: entry: + // X64-NEXT: %0 = fcmp olt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_LT_OS); } __mmask16 test_mm512_cmp_ps_mask_le_os(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_le_os - // CHECK: fcmp ole <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_le_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ole <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_le_os + // X64: entry: + // X64-NEXT: %0 = fcmp ole <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_LE_OS); } __mmask16 test_mm512_cmp_ps_mask_unord_q(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_unord_q - // CHECK: fcmp uno <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_unord_q + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uno <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_unord_q + // X64: entry: + // X64-NEXT: %0 = fcmp uno <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q); } __mmask16 test_mm512_cmp_ps_mask_neq_uq(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_neq_uq - // CHECK: fcmp une <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_neq_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp une <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_neq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp une <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ); } __mmask16 test_mm512_cmp_ps_mask_nlt_us(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_nlt_us - // CHECK: fcmp uge <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_nlt_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uge <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_nlt_us + // X64: entry: + // X64-NEXT: %0 = fcmp uge <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_NLT_US); } __mmask16 test_mm512_cmp_ps_mask_nle_us(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_nle_us - // CHECK: fcmp ugt <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_nle_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ugt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_nle_us + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_NLE_US); } __mmask16 test_mm512_cmp_ps_mask_ord_q(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_ord_q - // CHECK: fcmp ord <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_ord_q + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ord <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_ord_q + // X64: entry: + // X64-NEXT: %0 = fcmp ord <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_ORD_Q); } __mmask16 test_mm512_cmp_ps_mask_eq_uq(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_eq_uq - // CHECK: fcmp ueq <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_eq_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ueq <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_eq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_EQ_UQ); } __mmask16 test_mm512_cmp_ps_mask_nge_us(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_nge_us - // CHECK: fcmp ult <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_nge_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ult <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_nge_us + // X64: entry: + // X64-NEXT: %0 = fcmp ult <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_NGE_US); } __mmask16 test_mm512_cmp_ps_mask_ngt_us(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_ngt_us - // CHECK: fcmp ule <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_ngt_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ule <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_ngt_us + // X64: entry: + // X64-NEXT: %0 = fcmp ule <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_NGT_US); } __mmask16 test_mm512_cmp_ps_mask_false_oq(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_false_oq - // CHECK: fcmp false <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_false_oq + // APPLE: entry: + // APPLE-NEXT: ret i16 0 + // X64-LABEL: test_mm512_cmp_ps_mask_false_oq + // X64: entry: + // X64-NEXT: ret i16 0 return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OQ); } __mmask16 test_mm512_cmp_ps_mask_neq_oq(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_neq_oq - // CHECK: fcmp one <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_neq_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp one <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_neq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp one <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_OQ); } __mmask16 test_mm512_cmp_ps_mask_ge_os(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_ge_os - // CHECK: fcmp oge <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_ge_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oge <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_ge_os + // X64: entry: + // X64-NEXT: %0 = fcmp oge <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_GE_OS); } __mmask16 test_mm512_cmp_ps_mask_gt_os(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_gt_os - // CHECK: fcmp ogt <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_gt_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ogt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_gt_os + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_GT_OS); } __mmask16 test_mm512_cmp_ps_mask_true_uq(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_true_uq - // CHECK: fcmp true <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_true_uq + // APPLE: entry: + // APPLE-NEXT: ret i16 -1 + // X64-LABEL: test_mm512_cmp_ps_mask_true_uq + // X64: entry: + // X64-NEXT: ret i16 -1 return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_UQ); } __mmask16 test_mm512_cmp_ps_mask_eq_os(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_eq_os - // CHECK: fcmp oeq <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_eq_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_eq_os + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_EQ_OS); } __mmask16 test_mm512_cmp_ps_mask_lt_oq(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_lt_oq - // CHECK: fcmp olt <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_lt_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp olt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_lt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp olt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ); } __mmask16 test_mm512_cmp_ps_mask_le_oq(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_le_oq - // CHECK: fcmp ole <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_le_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ole <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_le_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ole <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ); } __mmask16 test_mm512_cmp_ps_mask_unord_s(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_unord_s - // CHECK: fcmp uno <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_unord_s + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uno <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_unord_s + // X64: entry: + // X64-NEXT: %0 = fcmp uno <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_UNORD_S); } __mmask16 test_mm512_cmp_ps_mask_neq_us(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_neq_us - // CHECK: fcmp une <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_neq_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp une <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_neq_us + // X64: entry: + // X64-NEXT: %0 = fcmp une <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_US); } __mmask16 test_mm512_cmp_ps_mask_nlt_uq(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_nlt_uq - // CHECK: fcmp uge <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_nlt_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uge <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_nlt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp uge <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_NLT_UQ); } __mmask16 test_mm512_cmp_ps_mask_nle_uq(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_nle_uq - // CHECK: fcmp ugt <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_nle_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ugt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_nle_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_NLE_UQ); } __mmask16 test_mm512_cmp_ps_mask_ord_s(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_ord_s - // CHECK: fcmp ord <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_ord_s + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ord <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_ord_s + // X64: entry: + // X64-NEXT: %0 = fcmp ord <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_ORD_S); } __mmask16 test_mm512_cmp_ps_mask_eq_us(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_eq_us - // CHECK: fcmp ueq <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_eq_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ueq <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_eq_us + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_EQ_US); } __mmask16 test_mm512_cmp_ps_mask_nge_uq(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_nge_uq - // CHECK: fcmp ult <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_nge_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ult <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_nge_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ult <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_NGE_UQ); } __mmask16 test_mm512_cmp_ps_mask_ngt_uq(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_ngt_uq - // CHECK: fcmp ule <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_ngt_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ule <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_ngt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ule <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_NGT_UQ); } __mmask16 test_mm512_cmp_ps_mask_false_os(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_false_os - // CHECK: fcmp false <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_false_os + // APPLE: entry: + // APPLE-NEXT: ret i16 0 + // X64-LABEL: test_mm512_cmp_ps_mask_false_os + // X64: entry: + // X64-NEXT: ret i16 0 return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OS); } __mmask16 test_mm512_cmp_ps_mask_neq_os(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_neq_os - // CHECK: fcmp one <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_neq_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp one <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_neq_os + // X64: entry: + // X64-NEXT: %0 = fcmp one <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_OS); } __mmask16 test_mm512_cmp_ps_mask_ge_oq(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_ge_oq - // CHECK: fcmp oge <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_ge_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oge <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_ge_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oge <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_GE_OQ); } __mmask16 test_mm512_cmp_ps_mask_gt_oq(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_gt_oq - // CHECK: fcmp ogt <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_gt_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ogt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmp_ps_mask_gt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmp_ps_mask(a, b, _CMP_GT_OQ); } __mmask16 test_mm512_cmp_ps_mask_true_us(__m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_cmp_ps_mask_true_us - // CHECK: fcmp true <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_ps_mask_true_us + // APPLE: entry: + // APPLE-NEXT: ret i16 -1 + // X64-LABEL: test_mm512_cmp_ps_mask_true_us + // X64: entry: + // X64-NEXT: ret i16 -1 return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_US); } __mmask16 test_mm512_mask_cmp_ps_mask_eq_oq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmp_ps_mask_eq_oq - // CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_eq_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_eq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_lt_os(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_lt_os - // CHECK: [[CMP:%.*]] = fcmp olt <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_lt_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp olt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_lt_os + // X64: entry: + // X64-NEXT: %0 = fcmp olt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_le_os(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_le_os - // CHECK: [[CMP:%.*]] = fcmp ole <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_le_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ole <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_le_os + // X64: entry: + // X64-NEXT: %0 = fcmp ole <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_unord_q(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_unord_q - // CHECK: [[CMP:%.*]] = fcmp uno <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_unord_q + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uno <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_unord_q + // X64: entry: + // X64-NEXT: %0 = fcmp uno <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q); } __mmask16 test_mm512_mask_cmp_ps_mask_neq_uq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_neq_uq - // CHECK: [[CMP:%.*]] = fcmp une <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_neq_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp une <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_neq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp une <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_nlt_us(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nlt_us - // CHECK: [[CMP:%.*]] = fcmp uge <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_nlt_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uge <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_nlt_us + // X64: entry: + // X64-NEXT: %0 = fcmp uge <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US); } __mmask16 test_mm512_mask_cmp_ps_mask_nle_us(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nle_us - // CHECK: [[CMP:%.*]] = fcmp ugt <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_nle_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ugt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_nle_us + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US); } __mmask16 test_mm512_mask_cmp_ps_mask_ord_q(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ord_q - // CHECK: [[CMP:%.*]] = fcmp ord <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_ord_q + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ord <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_ord_q + // X64: entry: + // X64-NEXT: %0 = fcmp ord <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q); } __mmask16 test_mm512_mask_cmp_ps_mask_eq_uq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_eq_uq - // CHECK: [[CMP:%.*]] = fcmp ueq <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_eq_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ueq <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_eq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_nge_us(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nge_us - // CHECK: [[CMP:%.*]] = fcmp ult <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_nge_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ult <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_nge_us + // X64: entry: + // X64-NEXT: %0 = fcmp ult <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NGE_US); } __mmask16 test_mm512_mask_cmp_ps_mask_ngt_us(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ngt_us - // CHECK: [[CMP:%.*]] = fcmp ule <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_ngt_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ule <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_ngt_us + // X64: entry: + // X64-NEXT: %0 = fcmp ule <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NGT_US); } __mmask16 test_mm512_mask_cmp_ps_mask_false_oq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_false_oq - // CHECK: [[CMP:%.*]] = fcmp false <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_false_oq + // APPLE: entry: + // APPLE-NEXT: ret i16 0 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_false_oq + // X64: entry: + // X64-NEXT: ret i16 0 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_neq_oq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_neq_oq - // CHECK: [[CMP:%.*]] = fcmp one <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_neq_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp one <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_neq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp one <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_ge_os(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ge_os - // CHECK: [[CMP:%.*]] = fcmp oge <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_ge_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oge <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_ge_os + // X64: entry: + // X64-NEXT: %0 = fcmp oge <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_GE_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_gt_os(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_gt_os - // CHECK: [[CMP:%.*]] = fcmp ogt <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_gt_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ogt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_gt_os + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_GT_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_true_uq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_true_uq - // CHECK: [[CMP:%.*]] = fcmp true <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_true_uq + // APPLE: entry: + // APPLE-NEXT: ret i16 %m + // X64-LABEL: test_mm512_mask_cmp_ps_mask_true_uq + // X64: entry: + // X64-NEXT: ret i16 %m return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_eq_os(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_eq_os - // CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_eq_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_eq_os + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_lt_oq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_lt_oq - // CHECK: [[CMP:%.*]] = fcmp olt <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_lt_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp olt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_lt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp olt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_le_oq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_le_oq - // CHECK: [[CMP:%.*]] = fcmp ole <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_le_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ole <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_le_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ole <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_unord_s(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_unord_s - // CHECK: [[CMP:%.*]] = fcmp uno <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_unord_s + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uno <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_unord_s + // X64: entry: + // X64-NEXT: %0 = fcmp uno <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_S); } __mmask16 test_mm512_mask_cmp_ps_mask_neq_us(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_neq_us - // CHECK: [[CMP:%.*]] = fcmp une <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_neq_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp une <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_neq_us + // X64: entry: + // X64-NEXT: %0 = fcmp une <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_US); } __mmask16 test_mm512_mask_cmp_ps_mask_nlt_uq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nlt_uq - // CHECK: [[CMP:%.*]] = fcmp uge <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_nlt_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uge <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_nlt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp uge <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_nle_uq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nle_uq - // CHECK: [[CMP:%.*]] = fcmp ugt <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_nle_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ugt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_nle_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_ord_s(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ord_s - // CHECK: [[CMP:%.*]] = fcmp ord <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_ord_s + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ord <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_ord_s + // X64: entry: + // X64-NEXT: %0 = fcmp ord <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_S); } __mmask16 test_mm512_mask_cmp_ps_mask_eq_us(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_eq_us - // CHECK: [[CMP:%.*]] = fcmp ueq <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_eq_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ueq <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_eq_us + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_US); } __mmask16 test_mm512_mask_cmp_ps_mask_nge_uq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nge_uq - // CHECK: [[CMP:%.*]] = fcmp ult <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_nge_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ult <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_nge_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ult <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NGE_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_ngt_uq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ngt_uq - // CHECK: [[CMP:%.*]] = fcmp ule <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_ngt_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ule <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_ngt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ule <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NGT_UQ); } __mmask16 test_mm512_mask_cmp_ps_mask_false_os(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_false_os - // CHECK: [[CMP:%.*]] = fcmp false <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_false_os + // APPLE: entry: + // APPLE-NEXT: ret i16 0 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_false_os + // X64: entry: + // X64-NEXT: ret i16 0 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_neq_os(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_neq_os - // CHECK: [[CMP:%.*]] = fcmp one <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_neq_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp one <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_neq_os + // X64: entry: + // X64-NEXT: %0 = fcmp one <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OS); } __mmask16 test_mm512_mask_cmp_ps_mask_ge_oq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ge_oq - // CHECK: [[CMP:%.*]] = fcmp oge <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_ge_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oge <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_ge_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oge <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_GE_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_gt_oq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_gt_oq - // CHECK: [[CMP:%.*]] = fcmp ogt <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_gt_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ogt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %m to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmp_ps_mask_gt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %m to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_GT_OQ); } __mmask16 test_mm512_mask_cmp_ps_mask_true_us(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_true_us - // CHECK: [[CMP:%.*]] = fcmp true <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_ps_mask_true_us + // APPLE: entry: + // APPLE-NEXT: ret i16 %m + // X64-LABEL: test_mm512_mask_cmp_ps_mask_true_us + // X64: entry: + // X64-NEXT: ret i16 %m return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_US); } __mmask8 test_mm512_cmp_round_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmp_round_pd_mask - // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_round_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_round_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_round_pd_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask8 test_mm512_mask_cmp_round_pd_mask(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmp_round_pd_mask - // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_round_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_round_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_round_pd_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask8 test_mm512_cmp_pd_mask_eq_oq(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmp_pd_mask_eq_oq - // CHECK: fcmp oeq <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_eq_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_eq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ); } __mmask8 test_mm512_cmp_pd_mask_lt_os(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_lt_os - // CHECK: fcmp olt <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_lt_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp olt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_lt_os + // X64: entry: + // X64-NEXT: %0 = fcmp olt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_LT_OS); } __mmask8 test_mm512_cmp_pd_mask_le_os(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_le_os - // CHECK: fcmp ole <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_le_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ole <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_le_os + // X64: entry: + // X64-NEXT: %0 = fcmp ole <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_LE_OS); } __mmask8 test_mm512_cmp_pd_mask_unord_q(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_unord_q - // CHECK: fcmp uno <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_unord_q + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uno <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_unord_q + // X64: entry: + // X64-NEXT: %0 = fcmp uno <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q); } __mmask8 test_mm512_cmp_pd_mask_neq_uq(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_neq_uq - // CHECK: fcmp une <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_neq_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp une <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_neq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp une <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ); } __mmask8 test_mm512_cmp_pd_mask_nlt_us(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_nlt_us - // CHECK: fcmp uge <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_nlt_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uge <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_nlt_us + // X64: entry: + // X64-NEXT: %0 = fcmp uge <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_NLT_US); } __mmask8 test_mm512_cmp_pd_mask_nle_us(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_nle_us - // CHECK: fcmp ugt <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_nle_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ugt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_nle_us + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_NLE_US); } __mmask8 test_mm512_cmp_pd_mask_ord_q(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_ord_q - // CHECK: fcmp ord <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_ord_q + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ord <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_ord_q + // X64: entry: + // X64-NEXT: %0 = fcmp ord <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_ORD_Q); } __mmask8 test_mm512_cmp_pd_mask_eq_uq(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_eq_uq - // CHECK: fcmp ueq <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_eq_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ueq <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_eq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_EQ_UQ); } __mmask8 test_mm512_cmp_pd_mask_nge_us(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_nge_us - // CHECK: fcmp ult <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_nge_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ult <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_nge_us + // X64: entry: + // X64-NEXT: %0 = fcmp ult <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_NGE_US); } __mmask8 test_mm512_cmp_pd_mask_ngt_us(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_ngt_us - // CHECK: fcmp ule <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_ngt_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ule <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_ngt_us + // X64: entry: + // X64-NEXT: %0 = fcmp ule <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_NGT_US); } __mmask8 test_mm512_cmp_pd_mask_false_oq(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_false_oq - // CHECK: fcmp false <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_false_oq + // APPLE: entry: + // APPLE-NEXT: ret i8 0 + // X64-LABEL: test_mm512_cmp_pd_mask_false_oq + // X64: entry: + // X64-NEXT: ret i8 0 return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OQ); } __mmask8 test_mm512_cmp_pd_mask_neq_oq(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_neq_oq - // CHECK: fcmp one <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_neq_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp one <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_neq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp one <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_NEQ_OQ); } __mmask8 test_mm512_cmp_pd_mask_ge_os(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_ge_os - // CHECK: fcmp oge <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_ge_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oge <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_ge_os + // X64: entry: + // X64-NEXT: %0 = fcmp oge <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_GE_OS); } __mmask8 test_mm512_cmp_pd_mask_gt_os(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_gt_os - // CHECK: fcmp ogt <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_gt_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ogt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_gt_os + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_GT_OS); } __mmask8 test_mm512_cmp_pd_mask_true_uq(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_true_uq - // CHECK: fcmp true <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_true_uq + // APPLE: entry: + // APPLE-NEXT: ret i8 -1 + // X64-LABEL: test_mm512_cmp_pd_mask_true_uq + // X64: entry: + // X64-NEXT: ret i8 -1 return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_UQ); } __mmask8 test_mm512_cmp_pd_mask_eq_os(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_eq_os - // CHECK: fcmp oeq <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_eq_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_eq_os + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_EQ_OS); } __mmask8 test_mm512_cmp_pd_mask_lt_oq(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_lt_oq - // CHECK: fcmp olt <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_lt_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp olt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_lt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp olt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ); } __mmask8 test_mm512_cmp_pd_mask_le_oq(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_le_oq - // CHECK: fcmp ole <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_le_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ole <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_le_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ole <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_LE_OQ); } __mmask8 test_mm512_cmp_pd_mask_unord_s(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_unord_s - // CHECK: fcmp uno <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_unord_s + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uno <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_unord_s + // X64: entry: + // X64-NEXT: %0 = fcmp uno <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_UNORD_S); } __mmask8 test_mm512_cmp_pd_mask_neq_us(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_neq_us - // CHECK: fcmp une <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_neq_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp une <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_neq_us + // X64: entry: + // X64-NEXT: %0 = fcmp une <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_NEQ_US); } __mmask8 test_mm512_cmp_pd_mask_nlt_uq(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_nlt_uq - // CHECK: fcmp uge <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_nlt_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uge <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_nlt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp uge <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_NLT_UQ); } __mmask8 test_mm512_cmp_pd_mask_nle_uq(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_nle_uq - // CHECK: fcmp ugt <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_nle_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ugt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_nle_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_NLE_UQ); } __mmask8 test_mm512_cmp_pd_mask_ord_s(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_ord_s - // CHECK: fcmp ord <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_ord_s + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ord <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_ord_s + // X64: entry: + // X64-NEXT: %0 = fcmp ord <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_ORD_S); } __mmask8 test_mm512_cmp_pd_mask_eq_us(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_eq_us - // CHECK: fcmp ueq <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_eq_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ueq <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_eq_us + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_EQ_US); } __mmask8 test_mm512_cmp_pd_mask_nge_uq(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_nge_uq - // CHECK: fcmp ult <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_nge_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ult <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_nge_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ult <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_NGE_UQ); } __mmask8 test_mm512_cmp_pd_mask_ngt_uq(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_ngt_uq - // CHECK: fcmp ule <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_ngt_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ule <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_ngt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ule <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_NGT_UQ); } __mmask8 test_mm512_cmp_pd_mask_false_os(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_false_os - // CHECK: fcmp false <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_false_os + // APPLE: entry: + // APPLE-NEXT: ret i8 0 + // X64-LABEL: test_mm512_cmp_pd_mask_false_os + // X64: entry: + // X64-NEXT: ret i8 0 return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OS); } __mmask8 test_mm512_cmp_pd_mask_neq_os(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_neq_os - // CHECK: fcmp one <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_neq_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp one <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_neq_os + // X64: entry: + // X64-NEXT: %0 = fcmp one <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_NEQ_OS); } __mmask8 test_mm512_cmp_pd_mask_ge_oq(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_ge_oq - // CHECK: fcmp oge <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_ge_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oge <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_ge_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oge <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_GE_OQ); } __mmask8 test_mm512_cmp_pd_mask_gt_oq(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_gt_oq - // CHECK: fcmp ogt <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_gt_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ogt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_pd_mask_gt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmp_pd_mask(a, b, _CMP_GT_OQ); } __mmask8 test_mm512_cmp_pd_mask_true_us(__m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_cmp_pd_mask_true_us - // CHECK: fcmp true <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_pd_mask_true_us + // APPLE: entry: + // APPLE-NEXT: ret i8 -1 + // X64-LABEL: test_mm512_cmp_pd_mask_true_us + // X64: entry: + // X64-NEXT: ret i8 -1 return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_US); } __mmask8 test_mm512_mask_cmp_pd_mask_eq_oq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmp_pd_mask_eq_oq - // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_eq_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_eq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_lt_os(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_lt_os - // CHECK: [[CMP:%.*]] = fcmp olt <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_lt_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp olt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_lt_os + // X64: entry: + // X64-NEXT: %0 = fcmp olt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_le_os(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_le_os - // CHECK: [[CMP:%.*]] = fcmp ole <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_le_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ole <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_le_os + // X64: entry: + // X64-NEXT: %0 = fcmp ole <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_unord_q(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_unord_q - // CHECK: [[CMP:%.*]] = fcmp uno <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_unord_q + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uno <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_unord_q + // X64: entry: + // X64-NEXT: %0 = fcmp uno <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q); } __mmask8 test_mm512_mask_cmp_pd_mask_neq_uq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_neq_uq - // CHECK: [[CMP:%.*]] = fcmp une <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_neq_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp une <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_neq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp une <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_nlt_us(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nlt_us - // CHECK: [[CMP:%.*]] = fcmp uge <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_nlt_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uge <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_nlt_us + // X64: entry: + // X64-NEXT: %0 = fcmp uge <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US); } __mmask8 test_mm512_mask_cmp_pd_mask_nle_us(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nle_us - // CHECK: [[CMP:%.*]] = fcmp ugt <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_nle_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ugt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_nle_us + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US); } __mmask8 test_mm512_mask_cmp_pd_mask_ord_q(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ord_q - // CHECK: [[CMP:%.*]] = fcmp ord <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_ord_q + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ord <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_ord_q + // X64: entry: + // X64-NEXT: %0 = fcmp ord <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q); } __mmask8 test_mm512_mask_cmp_pd_mask_eq_uq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_eq_uq - // CHECK: [[CMP:%.*]] = fcmp ueq <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_eq_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ueq <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_eq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_nge_us(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nge_us - // CHECK: [[CMP:%.*]] = fcmp ult <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_nge_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ult <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_nge_us + // X64: entry: + // X64-NEXT: %0 = fcmp ult <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NGE_US); } __mmask8 test_mm512_mask_cmp_pd_mask_ngt_us(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ngt_us - // CHECK: [[CMP:%.*]] = fcmp ule <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_ngt_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ule <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_ngt_us + // X64: entry: + // X64-NEXT: %0 = fcmp ule <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NGT_US); } __mmask8 test_mm512_mask_cmp_pd_mask_false_oq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_false_oq - // CHECK: [[CMP:%.*]] = fcmp false <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_false_oq + // APPLE: entry: + // APPLE-NEXT: ret i8 0 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_false_oq + // X64: entry: + // X64-NEXT: ret i8 0 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_neq_oq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_neq_oq - // CHECK: [[CMP:%.*]] = fcmp one <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_neq_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp one <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_neq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp one <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_ge_os(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ge_os - // CHECK: [[CMP:%.*]] = fcmp oge <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_ge_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oge <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_ge_os + // X64: entry: + // X64-NEXT: %0 = fcmp oge <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_GE_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_gt_os(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_gt_os - // CHECK: [[CMP:%.*]] = fcmp ogt <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_gt_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ogt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_gt_os + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_GT_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_true_uq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_true_uq - // CHECK: [[CMP:%.*]] = fcmp true <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_true_uq + // APPLE: entry: + // APPLE-NEXT: ret i8 %m + // X64-LABEL: test_mm512_mask_cmp_pd_mask_true_uq + // X64: entry: + // X64-NEXT: ret i8 %m return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_eq_os(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_eq_os - // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_eq_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_eq_os + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_lt_oq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_lt_oq - // CHECK: [[CMP:%.*]] = fcmp olt <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_lt_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp olt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_lt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp olt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_le_oq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_le_oq - // CHECK: [[CMP:%.*]] = fcmp ole <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_le_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ole <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_le_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ole <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_unord_s(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_unord_s - // CHECK: [[CMP:%.*]] = fcmp uno <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_unord_s + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uno <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_unord_s + // X64: entry: + // X64-NEXT: %0 = fcmp uno <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_S); } __mmask8 test_mm512_mask_cmp_pd_mask_neq_us(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_neq_us - // CHECK: [[CMP:%.*]] = fcmp une <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_neq_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp une <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_neq_us + // X64: entry: + // X64-NEXT: %0 = fcmp une <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_US); } __mmask8 test_mm512_mask_cmp_pd_mask_nlt_uq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nlt_uq - // CHECK: [[CMP:%.*]] = fcmp uge <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_nlt_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uge <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_nlt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp uge <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_nle_uq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nle_uq - // CHECK: [[CMP:%.*]] = fcmp ugt <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_nle_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ugt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_nle_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_ord_s(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ord_s - // CHECK: [[CMP:%.*]] = fcmp ord <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_ord_s + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ord <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_ord_s + // X64: entry: + // X64-NEXT: %0 = fcmp ord <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_S); } __mmask8 test_mm512_mask_cmp_pd_mask_eq_us(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_eq_us - // CHECK: [[CMP:%.*]] = fcmp ueq <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_eq_us + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ueq <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_eq_us + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_US); } __mmask8 test_mm512_mask_cmp_pd_mask_nge_uq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nge_uq - // CHECK: [[CMP:%.*]] = fcmp ult <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_nge_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ult <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_nge_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ult <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NGE_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_ngt_uq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ngt_uq - // CHECK: [[CMP:%.*]] = fcmp ule <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_ngt_uq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ule <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_ngt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ule <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NGT_UQ); } __mmask8 test_mm512_mask_cmp_pd_mask_false_os(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_false_os - // CHECK: [[CMP:%.*]] = fcmp false <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_false_os + // APPLE: entry: + // APPLE-NEXT: ret i8 0 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_false_os + // X64: entry: + // X64-NEXT: ret i8 0 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_neq_os(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_neq_os - // CHECK: [[CMP:%.*]] = fcmp one <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_neq_os + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp one <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_neq_os + // X64: entry: + // X64-NEXT: %0 = fcmp one <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OS); } __mmask8 test_mm512_mask_cmp_pd_mask_ge_oq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ge_oq - // CHECK: [[CMP:%.*]] = fcmp oge <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_ge_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oge <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_ge_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oge <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_GE_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_gt_oq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_gt_oq - // CHECK: [[CMP:%.*]] = fcmp ogt <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_gt_oq + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ogt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask_gt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_GT_OQ); } __mmask8 test_mm512_mask_cmp_pd_mask_true_us(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_true_us - // CHECK: [[CMP:%.*]] = fcmp true <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask_true_us + // APPLE: entry: + // APPLE-NEXT: ret i8 %m + // X64-LABEL: test_mm512_mask_cmp_pd_mask_true_us + // X64: entry: + // X64-NEXT: ret i8 %m return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_US); } __mmask8 test_mm512_mask_cmp_pd_mask(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmp_pd_mask - // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %m to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmp_pd_mask(m, a, b, 0); } __mmask8 test_mm512_cmpeq_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmpeq_pd_mask - // CHECK: fcmp oeq <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpeq_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpeq_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmpeq_pd_mask(a, b); } __mmask16 test_mm512_cmpeq_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmpeq_ps_mask - // CHECK: fcmp oeq <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpeq_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmpeq_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmpeq_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpeq_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmpeq_pd_mask - // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmpeq_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %k to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpeq_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %k to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmpeq_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmpeq_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmpeq_ps_mask - // CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmpeq_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %k to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmpeq_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %k to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmpeq_ps_mask(k, a, b); } __mmask8 test_mm512_cmple_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmple_pd_mask - // CHECK: fcmp ole <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmple_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ole <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmple_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp ole <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmple_pd_mask(a, b); } __mmask16 test_mm512_cmple_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmple_ps_mask - // CHECK: fcmp ole <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmple_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ole <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmple_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp ole <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmple_ps_mask(a, b); } __mmask8 test_mm512_mask_cmple_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmple_pd_mask - // CHECK: [[CMP:%.*]] = fcmp ole <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmple_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ole <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %k to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmple_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp ole <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %k to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmple_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmple_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmple_ps_mask - // CHECK: [[CMP:%.*]] = fcmp ole <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmple_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ole <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %k to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmple_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp ole <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %k to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmple_ps_mask(k, a, b); } __mmask8 test_mm512_cmplt_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmplt_pd_mask - // CHECK: fcmp olt <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmplt_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp olt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmplt_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp olt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmplt_pd_mask(a, b); } __mmask16 test_mm512_cmplt_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmplt_ps_mask - // CHECK: fcmp olt <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmplt_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp olt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmplt_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp olt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmplt_ps_mask(a, b); } __mmask8 test_mm512_mask_cmplt_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmplt_pd_mask - // CHECK: [[CMP:%.*]] = fcmp olt <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmplt_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp olt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %k to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmplt_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp olt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %k to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmplt_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmplt_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmplt_ps_mask - // CHECK: [[CMP:%.*]] = fcmp olt <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmplt_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp olt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %k to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmplt_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp olt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %k to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmplt_ps_mask(k, a, b); } __mmask8 test_mm512_cmpneq_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmpneq_pd_mask - // CHECK: fcmp une <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpneq_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp une <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpneq_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp une <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmpneq_pd_mask(a, b); } __mmask16 test_mm512_cmpneq_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmpneq_ps_mask - // CHECK: fcmp une <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpneq_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp une <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmpneq_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp une <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmpneq_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpneq_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmpneq_pd_mask - // CHECK: [[CMP:%.*]] = fcmp une <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmpneq_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp une <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %k to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpneq_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp une <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %k to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmpneq_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmpneq_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmpneq_ps_mask - // CHECK: [[CMP:%.*]] = fcmp une <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmpneq_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp une <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %k to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmpneq_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp une <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %k to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmpneq_ps_mask(k, a, b); } __mmask8 test_mm512_cmpnle_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmpnle_pd_mask - // CHECK: fcmp ugt <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpnle_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ugt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpnle_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmpnle_pd_mask(a, b); } __mmask16 test_mm512_cmpnle_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmpnle_ps_mask - // CHECK: fcmp ugt <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpnle_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ugt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmpnle_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmpnle_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpnle_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmpnle_pd_mask - // CHECK: [[CMP:%.*]] = fcmp ugt <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmpnle_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ugt <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %k to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpnle_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %k to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmpnle_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmpnle_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmpnle_ps_mask - // CHECK: [[CMP:%.*]] = fcmp ugt <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmpnle_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ugt <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %k to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmpnle_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %k to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmpnle_ps_mask(k, a, b); } __mmask8 test_mm512_cmpnlt_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmpnlt_pd_mask - // CHECK: fcmp uge <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpnlt_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uge <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpnlt_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp uge <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmpnlt_pd_mask(a, b); } __mmask16 test_mm512_cmpnlt_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmpnlt_ps_mask - // CHECK: fcmp uge <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpnlt_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uge <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmpnlt_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp uge <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmpnlt_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpnlt_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmpnlt_pd_mask - // CHECK: [[CMP:%.*]] = fcmp uge <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmpnlt_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uge <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %k to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpnlt_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp uge <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %k to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmpnlt_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmpnlt_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmpnlt_ps_mask - // CHECK: [[CMP:%.*]] = fcmp uge <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmpnlt_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uge <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %k to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmpnlt_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp uge <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %k to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmpnlt_ps_mask(k, a, b); } __mmask8 test_mm512_cmpord_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmpord_pd_mask - // CHECK: fcmp ord <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpord_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ord <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpord_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp ord <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmpord_pd_mask(a, b); } __mmask16 test_mm512_cmpord_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmpord_ps_mask - // CHECK: fcmp ord <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpord_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ord <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmpord_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp ord <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmpord_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpord_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmpord_pd_mask - // CHECK: [[CMP:%.*]] = fcmp ord <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmpord_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ord <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %k to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpord_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp ord <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %k to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmpord_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmpord_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmpord_ps_mask - // CHECK: [[CMP:%.*]] = fcmp ord <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmpord_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp ord <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %k to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmpord_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp ord <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %k to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmpord_ps_mask(k, a, b); } __mmask8 test_mm512_cmpunord_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmpunord_pd_mask - // CHECK: fcmp uno <8 x double> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpunord_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uno <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpunord_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp uno <8 x double> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_cmpunord_pd_mask(a, b); } __mmask16 test_mm512_cmpunord_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmpunord_ps_mask - // CHECK: fcmp uno <16 x float> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpunord_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uno <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // APPLE-NEXT: ret i16 %1 + // X64-LABEL: test_mm512_cmpunord_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp uno <16 x float> %a, %b + // X64-NEXT: %1 = bitcast <16 x i1> %0 to i16 + // X64-NEXT: ret i16 %1 return _mm512_cmpunord_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpunord_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmpunord_pd_mask - // CHECK: [[CMP:%.*]] = fcmp uno <8 x double> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmpunord_pd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uno <8 x double> %a, %b + // APPLE-NEXT: %1 = bitcast i8 %k to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpunord_pd_mask + // X64: entry: + // X64-NEXT: %0 = fcmp uno <8 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %k to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_cmpunord_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmpunord_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmpunord_ps_mask - // CHECK: [[CMP:%.*]] = fcmp uno <16 x float> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> [[CMP]], {{.*}} + // APPLE-LABEL: test_mm512_mask_cmpunord_ps_mask + // APPLE: entry: + // APPLE-NEXT: %0 = fcmp uno <16 x float> %a, %b + // APPLE-NEXT: %1 = bitcast i16 %k to <16 x i1> + // APPLE-NEXT: %2 = and <16 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_mask_cmpunord_ps_mask + // X64: entry: + // X64-NEXT: %0 = fcmp uno <16 x float> %a, %b + // X64-NEXT: %1 = bitcast i16 %k to <16 x i1> + // X64-NEXT: %2 = and <16 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return _mm512_mask_cmpunord_ps_mask(k, a, b); } __m256d test_mm512_extractf64x4_pd(__m512d a) { - // CHECK-LABEL: @test_mm512_extractf64x4_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> + // APPLE-LABEL: test_mm512_extractf64x4_pd + // APPLE: entry: + // APPLE-NEXT: %extract = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> + // APPLE-NEXT: ret <4 x double> %extract + // X64-LABEL: test_mm512_extractf64x4_pd + // X64: entry: + // X64-NEXT: %extract = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> + // X64-NEXT: ret <4 x double> %extract return _mm512_extractf64x4_pd(a, 1); } __m256d test_mm512_mask_extractf64x4_pd(__m256d __W,__mmask8 __U,__m512d __A){ - // CHECK-LABEL:@test_mm512_mask_extractf64x4_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_extractf64x4_pd + // APPLE: entry: + // APPLE-NEXT: %extract = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // APPLE-NEXT: %1 = select <4 x i1> %extract1, <4 x double> %extract, <4 x double> %__W + // APPLE-NEXT: ret <4 x double> %1 + // X64-LABEL: test_mm512_mask_extractf64x4_pd + // X64: entry: + // X64-NEXT: %extract = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract1, <4 x double> %extract, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1); } __m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){ - // CHECK-LABEL:@test_mm512_maskz_extractf64x4_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_extractf64x4_pd + // APPLE: entry: + // APPLE-NEXT: %extract = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // APPLE-NEXT: %1 = select <4 x i1> %extract1, <4 x double> %extract, <4 x double> zeroinitializer + // APPLE-NEXT: ret <4 x double> %1 + // X64-LABEL: test_mm512_maskz_extractf64x4_pd + // X64: entry: + // X64-NEXT: %extract = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract1, <4 x double> %extract, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm512_maskz_extractf64x4_pd( __U, __A, 1); } __m128 test_mm512_extractf32x4_ps(__m512 a) { - // CHECK-LABEL: @test_mm512_extractf32x4_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> + // APPLE-LABEL: test_mm512_extractf32x4_ps + // APPLE: entry: + // APPLE-NEXT: %extract = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> + // APPLE-NEXT: ret <4 x float> %extract + // X64-LABEL: test_mm512_extractf32x4_ps + // X64: entry: + // X64-NEXT: %extract = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> + // X64-NEXT: ret <4 x float> %extract return _mm512_extractf32x4_ps(a, 1); } __m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8 __U,__m512 __A){ - // CHECK-LABEL:@test_mm512_mask_extractf32x4_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_extractf32x4_ps + // APPLE: entry: + // APPLE-NEXT: %extract = shufflevector <16 x float> %__A, <16 x float> undef, <4 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // APPLE-NEXT: %1 = select <4 x i1> %extract1, <4 x float> %extract, <4 x float> %__W + // APPLE-NEXT: ret <4 x float> %1 + // X64-LABEL: test_mm512_mask_extractf32x4_ps + // X64: entry: + // X64-NEXT: %extract = shufflevector <16 x float> %__A, <16 x float> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract1, <4 x float> %extract, <4 x float> %__W + // X64-NEXT: ret <4 x float> %1 return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1); } __m128 test_mm512_maskz_extractf32x4_ps( __mmask8 __U,__m512 __A){ - // CHECK-LABEL:@test_mm512_maskz_extractf32x4_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm512_maskz_extractf32x4_ps( __U, __A, 1); + // APPLE-LABEL: test_mm512_maskz_extractf32x4_ps + // APPLE: entry: + // APPLE-NEXT: %extract = shufflevector <16 x float> %__A, <16 x float> undef, <4 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // APPLE-NEXT: %1 = select <4 x i1> %extract1, <4 x float> %extract, <4 x float> zeroinitializer + // APPLE-NEXT: ret <4 x float> %1 + // X64-LABEL: test_mm512_maskz_extractf32x4_ps + // X64: entry: + // X64-NEXT: %extract = shufflevector <16 x float> %__A, <16 x float> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract1, <4 x float> %extract, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 + return _mm512_maskz_extractf32x4_ps(__U, __A, 1); } __mmask16 test_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpeq_epu32_mask - // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpeq_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmpeq_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmpeq_epu32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpeq_epu32_mask - // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpeq_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmpeq_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmpeq_epu32_mask(__u, __a, __b); } __mmask8 test_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpeq_epu64_mask - // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpeq_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpeq_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmpeq_epu64_mask(__a, __b); } __mmask8 test_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpeq_epu64_mask - // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpeq_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpeq_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmpeq_epu64_mask(__u, __a, __b); } __mmask16 test_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpge_epi32_mask - // CHECK: icmp sge <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpge_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp sge <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmpge_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp sge <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmpge_epi32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpge_epi32_mask - // CHECK: icmp sge <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpge_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp sge <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmpge_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp sge <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmpge_epi32_mask(__u, __a, __b); } __mmask8 test_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpge_epi64_mask - // CHECK: icmp sge <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpge_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp sge <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpge_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sge <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmpge_epi64_mask(__a, __b); } __mmask8 test_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpge_epi64_mask - // CHECK: icmp sge <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpge_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp sge <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpge_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sge <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmpge_epi64_mask(__u, __a, __b); } __mmask16 test_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpge_epu32_mask - // CHECK: icmp uge <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpge_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp uge <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmpge_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp uge <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmpge_epu32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpge_epu32_mask - // CHECK: icmp uge <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpge_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp uge <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmpge_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp uge <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmpge_epu32_mask(__u, __a, __b); } __mmask8 test_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpge_epu64_mask - // CHECK: icmp uge <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpge_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp uge <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpge_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp uge <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmpge_epu64_mask(__a, __b); } __mmask8 test_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpge_epu64_mask - // CHECK: icmp uge <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpge_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp uge <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpge_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp uge <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmpge_epu64_mask(__u, __a, __b); } __mmask16 test_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpgt_epu32_mask - // CHECK: icmp ugt <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpgt_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp ugt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmpgt_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp ugt <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmpgt_epu32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpgt_epu32_mask - // CHECK: icmp ugt <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpgt_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp ugt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmpgt_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp ugt <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmpgt_epu32_mask(__u, __a, __b); } __mmask8 test_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpgt_epu64_mask - // CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpgt_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ugt <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpgt_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ugt <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmpgt_epu64_mask(__a, __b); } __mmask8 test_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpgt_epu64_mask - // CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpgt_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ugt <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpgt_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ugt <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmpgt_epu64_mask(__u, __a, __b); } __mmask16 test_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmple_epi32_mask - // CHECK: icmp sle <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmple_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp sle <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmple_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp sle <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmple_epi32_mask(__a, __b); } __mmask16 test_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmple_epi32_mask - // CHECK: icmp sle <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmple_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp sle <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmple_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp sle <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmple_epi32_mask(__u, __a, __b); } __mmask8 test_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmple_epi64_mask - // CHECK: icmp sle <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmple_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp sle <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmple_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sle <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmple_epi64_mask(__a, __b); } __mmask8 test_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmple_epi64_mask - // CHECK: icmp sle <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmple_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp sle <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmple_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sle <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmple_epi64_mask(__u, __a, __b); } __mmask16 test_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmple_epu32_mask - // CHECK: icmp ule <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmple_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp ule <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmple_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp ule <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmple_epu32_mask(__a, __b); } __mmask16 test_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmple_epu32_mask - // CHECK: icmp ule <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmple_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp ule <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmple_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp ule <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmple_epu32_mask(__u, __a, __b); } __mmask8 test_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmple_epu64_mask - // CHECK: icmp ule <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmple_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ule <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmple_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ule <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmple_epu64_mask(__a, __b); } __mmask8 test_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmple_epu64_mask - // CHECK: icmp ule <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmple_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ule <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmple_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ule <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmple_epu64_mask(__u, __a, __b); } __mmask16 test_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmplt_epi32_mask - // CHECK: icmp slt <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmplt_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp slt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmplt_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp slt <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmplt_epi32_mask(__a, __b); } __mmask16 test_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmplt_epi32_mask - // CHECK: icmp slt <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmplt_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp slt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmplt_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp slt <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmplt_epi32_mask(__u, __a, __b); } __mmask8 test_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmplt_epi64_mask - // CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmplt_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp slt <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmplt_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp slt <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmplt_epi64_mask(__a, __b); } __mmask8 test_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmplt_epi64_mask - // CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmplt_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp slt <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmplt_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp slt <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmplt_epi64_mask(__u, __a, __b); } __mmask16 test_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmplt_epu32_mask - // CHECK: icmp ult <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmplt_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp ult <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmplt_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp ult <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmplt_epu32_mask(__a, __b); } __mmask16 test_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmplt_epu32_mask - // CHECK: icmp ult <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmplt_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp ult <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmplt_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp ult <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmplt_epu32_mask(__u, __a, __b); } __mmask8 test_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmplt_epu64_mask - // CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmplt_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ult <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmplt_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ult <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmplt_epu64_mask(__a, __b); } __mmask8 test_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmplt_epu64_mask - // CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmplt_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ult <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmplt_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ult <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmplt_epu64_mask(__u, __a, __b); } __mmask16 test_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpneq_epi32_mask - // CHECK: icmp ne <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpneq_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmpneq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmpneq_epi32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpneq_epi32_mask - // CHECK: icmp ne <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpneq_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmpneq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmpneq_epi32_mask(__u, __a, __b); } __mmask8 test_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpneq_epi64_mask - // CHECK: icmp ne <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpneq_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ne <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpneq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ne <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmpneq_epi64_mask(__a, __b); } __mmask8 test_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpneq_epi64_mask - // CHECK: icmp ne <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpneq_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ne <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpneq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ne <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmpneq_epi64_mask(__u, __a, __b); } __mmask16 test_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpneq_epu32_mask - // CHECK: icmp ne <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpneq_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmpneq_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmpneq_epu32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpneq_epu32_mask - // CHECK: icmp ne <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpneq_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmpneq_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmpneq_epu32_mask(__u, __a, __b); } __mmask8 test_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpneq_epu64_mask - // CHECK: icmp ne <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmpneq_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ne <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmpneq_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ne <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmpneq_epu64_mask(__a, __b); } __mmask8 test_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpneq_epu64_mask - // CHECK: icmp ne <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmpneq_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ne <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmpneq_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ne <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmpneq_epu64_mask(__u, __a, __b); } __mmask16 test_mm512_cmp_eq_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmp_eq_epi32_mask - // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_eq_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmp_eq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmp_epi32_mask(__a, __b, _MM_CMPINT_EQ); } __mmask16 test_mm512_mask_cmp_eq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmp_eq_epi32_mask - // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_eq_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmp_eq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmp_epi32_mask(__u, __a, __b, _MM_CMPINT_EQ); } __mmask8 test_mm512_cmp_eq_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmp_eq_epi64_mask - // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_eq_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_eq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmp_epi64_mask(__a, __b, _MM_CMPINT_EQ); } __mmask8 test_mm512_mask_cmp_eq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmp_eq_epi64_mask - // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_eq_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_eq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmp_epi64_mask(__u, __a, __b, _MM_CMPINT_EQ); } __mmask16 test_mm512_cmp_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmp_epu32_mask - // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: ret i16 %3 + // X64-LABEL: test_mm512_cmp_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: ret i16 %3 return (__mmask16)_mm512_cmp_epu32_mask(__a, __b, 0); } __mmask16 test_mm512_mask_cmp_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmp_epu32_mask - // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_epu32_mask + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // APPLE-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_mm512_mask_cmp_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__a to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__b to <16 x i32> + // X64-NEXT: %2 = icmp eq <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return (__mmask16)_mm512_mask_cmp_epu32_mask(__u, __a, __b, 0); } __mmask8 test_mm512_cmp_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmp_epu64_mask - // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_cmp_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_cmp_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return (__mmask8)_mm512_cmp_epu64_mask(__a, __b, 0); } __mmask8 test_mm512_mask_cmp_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmp_epu64_mask - // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_cmp_epu64_mask + // APPLE: entry: + // APPLE-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // APPLE-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_cmp_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <8 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm512_mask_cmp_epu64_mask(__u, __a, __b, 0); } __m512i test_mm512_mask_and_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_and_epi32 - // CHECK: and <16 x i32> - // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_and_epi32 + // APPLE: entry: + // APPLE-NEXT: %and1.i.i = and <8 x i64> %__b, %__a + // APPLE-NEXT: %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__src to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_and_epi32 + // X64: entry: + // X64-NEXT: %and1.i.i = and <8 x i64> %__b, %__a + // X64-NEXT: %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__src to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_and_epi32(__src, __k,__a, __b); } __m512i test_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_maskz_and_epi32 - // CHECK: and <16 x i32> - // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_and_epi32 + // APPLE: entry: + // APPLE-NEXT: %and1.i.i.i = and <8 x i64> %__b, %__a + // APPLE-NEXT: %0 = bitcast <8 x i64> %and1.i.i.i to <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_and_epi32 + // X64: entry: + // X64-NEXT: %and1.i.i.i = and <8 x i64> %__b, %__a + // X64-NEXT: %0 = bitcast <8 x i64> %and1.i.i.i to <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_and_epi32(__k,__a, __b); } __m512i test_mm512_mask_and_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_and_epi64 - // CHECK: %[[AND_RES:.*]] = and <8 x i64> - // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[AND_RES]], <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_and_epi64 + // APPLE: entry: + // APPLE-NEXT: %and.i.i = and <8 x i64> %__b, %__a + // APPLE-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %and.i.i, <8 x i64> %__src + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_and_epi64 + // X64: entry: + // X64-NEXT: %and.i.i = and <8 x i64> %__b, %__a + // X64-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %and.i.i, <8 x i64> %__src + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_and_epi64(__src, __k,__a, __b); } __m512i test_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_maskz_and_epi64 - // CHECK: %[[AND_RES:.*]] = and <8 x i64> - // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[AND_RES]], <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_and_epi64 + // APPLE: entry: + // APPLE-NEXT: %and.i.i.i = and <8 x i64> %__b, %__a + // APPLE-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %and.i.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_and_epi64 + // X64: entry: + // X64-NEXT: %and.i.i.i = and <8 x i64> %__b, %__a + // X64-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %and.i.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_and_epi64(__k,__a, __b); } __m512i test_mm512_mask_or_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_or_epi32 - // CHECK: or <16 x i32> - // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_or_epi32 + // APPLE: entry: + // APPLE-NEXT: %or1.i.i = or <8 x i64> %__b, %__a + // APPLE-NEXT: %0 = bitcast <8 x i64> %or1.i.i to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__src to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_or_epi32 + // X64: entry: + // X64-NEXT: %or1.i.i = or <8 x i64> %__b, %__a + // X64-NEXT: %0 = bitcast <8 x i64> %or1.i.i to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__src to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_or_epi32(__src, __k,__a, __b); } __m512i test_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_maskz_or_epi32 - // CHECK: or <16 x i32> - // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_or_epi32 + // APPLE: entry: + // APPLE-NEXT: %or1.i.i.i = or <8 x i64> %__b, %__a + // APPLE-NEXT: %0 = bitcast <8 x i64> %or1.i.i.i to <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_or_epi32 + // X64: entry: + // X64-NEXT: %or1.i.i.i = or <8 x i64> %__b, %__a + // X64-NEXT: %0 = bitcast <8 x i64> %or1.i.i.i to <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_or_epi32(__k,__a, __b); } __m512i test_mm512_mask_or_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_or_epi64 - // CHECK: %[[OR_RES:.*]] = or <8 x i64> - // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[OR_RES]], <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_or_epi64 + // APPLE: entry: + // APPLE-NEXT: %or.i.i = or <8 x i64> %__b, %__a + // APPLE-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %or.i.i, <8 x i64> %__src + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_or_epi64 + // X64: entry: + // X64-NEXT: %or.i.i = or <8 x i64> %__b, %__a + // X64-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %or.i.i, <8 x i64> %__src + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_or_epi64(__src, __k,__a, __b); } __m512i test_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_maskz_or_epi64 - // CHECK: %[[OR_RES:.*]] = or <8 x i64> - // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[OR_RES]], <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_or_epi64 + // APPLE: entry: + // APPLE-NEXT: %or.i.i.i = or <8 x i64> %__b, %__a + // APPLE-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %or.i.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_or_epi64 + // X64: entry: + // X64-NEXT: %or.i.i.i = or <8 x i64> %__b, %__a + // X64-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %or.i.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_or_epi64(__k,__a, __b); } __m512i test_mm512_mask_xor_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_xor_epi32 - // CHECK: xor <16 x i32> - // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_xor_epi32 + // APPLE: entry: + // APPLE-NEXT: %xor1.i.i = xor <8 x i64> %__b, %__a + // APPLE-NEXT: %0 = bitcast <8 x i64> %xor1.i.i to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__src to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_xor_epi32 + // X64: entry: + // X64-NEXT: %xor1.i.i = xor <8 x i64> %__b, %__a + // X64-NEXT: %0 = bitcast <8 x i64> %xor1.i.i to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__src to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_xor_epi32(__src, __k,__a, __b); } __m512i test_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_maskz_xor_epi32 - // CHECK: xor <16 x i32> - // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_xor_epi32 + // APPLE: entry: + // APPLE-NEXT: %xor1.i.i.i = xor <8 x i64> %__b, %__a + // APPLE-NEXT: %0 = bitcast <8 x i64> %xor1.i.i.i to <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_xor_epi32 + // X64: entry: + // X64-NEXT: %xor1.i.i.i = xor <8 x i64> %__b, %__a + // X64-NEXT: %0 = bitcast <8 x i64> %xor1.i.i.i to <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_xor_epi32(__k,__a, __b); } __m512i test_mm512_mask_xor_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_xor_epi64 - // CHECK: %[[XOR_RES:.*]] = xor <8 x i64> - // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[XOR_RES]], <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_xor_epi64 + // APPLE: entry: + // APPLE-NEXT: %xor.i.i = xor <8 x i64> %__b, %__a + // APPLE-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %xor.i.i, <8 x i64> %__src + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_xor_epi64 + // X64: entry: + // X64-NEXT: %xor.i.i = xor <8 x i64> %__b, %__a + // X64-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %xor.i.i, <8 x i64> %__src + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_xor_epi64(__src, __k,__a, __b); } __m512i test_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_maskz_xor_epi64 - // CHECK: %[[XOR_RES:.*]] = xor <8 x i64> - // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[XOR_RES]], <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_xor_epi64 + // APPLE: entry: + // APPLE-NEXT: %xor.i.i.i = xor <8 x i64> %__b, %__a + // APPLE-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %xor.i.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_xor_epi64 + // X64: entry: + // X64-NEXT: %xor.i.i.i = xor <8 x i64> %__b, %__a + // X64-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %xor.i.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_xor_epi64(__k,__a, __b); } __m512i test_mm512_and_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_and_epi32 - // CHECK: and <16 x i32> + // APPLE-LABEL: test_mm512_and_epi32 + // APPLE: entry: + // APPLE-NEXT: %and1.i = and <8 x i64> %__b, %__a + // APPLE-NEXT: ret <8 x i64> %and1.i + // X64-LABEL: test_mm512_and_epi32 + // X64: entry: + // X64-NEXT: %and1.i = and <8 x i64> %__b, %__a + // X64-NEXT: ret <8 x i64> %and1.i return _mm512_and_epi32(__a, __b); } __m512i test_mm512_and_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_and_epi64 - // CHECK: and <8 x i64> + // APPLE-LABEL: test_mm512_and_epi64 + // APPLE: entry: + // APPLE-NEXT: %and.i = and <8 x i64> %__b, %__a + // APPLE-NEXT: ret <8 x i64> %and.i + // X64-LABEL: test_mm512_and_epi64 + // X64: entry: + // X64-NEXT: %and.i = and <8 x i64> %__b, %__a + // X64-NEXT: ret <8 x i64> %and.i return _mm512_and_epi64(__a, __b); } __m512i test_mm512_or_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_or_epi32 - // CHECK: or <16 x i32> + // APPLE-LABEL: test_mm512_or_epi32 + // APPLE: entry: + // APPLE-NEXT: %or1.i = or <8 x i64> %__b, %__a + // APPLE-NEXT: ret <8 x i64> %or1.i + // X64-LABEL: test_mm512_or_epi32 + // X64: entry: + // X64-NEXT: %or1.i = or <8 x i64> %__b, %__a + // X64-NEXT: ret <8 x i64> %or1.i return _mm512_or_epi32(__a, __b); } __m512i test_mm512_or_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_or_epi64 - // CHECK: or <8 x i64> + // APPLE-LABEL: test_mm512_or_epi64 + // APPLE: entry: + // APPLE-NEXT: %or.i = or <8 x i64> %__b, %__a + // APPLE-NEXT: ret <8 x i64> %or.i + // X64-LABEL: test_mm512_or_epi64 + // X64: entry: + // X64-NEXT: %or.i = or <8 x i64> %__b, %__a + // X64-NEXT: ret <8 x i64> %or.i return _mm512_or_epi64(__a, __b); } __m512i test_mm512_xor_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_xor_epi32 - // CHECK: xor <16 x i32> + // APPLE-LABEL: test_mm512_xor_epi32 + // APPLE: entry: + // APPLE-NEXT: %xor1.i = xor <8 x i64> %__b, %__a + // APPLE-NEXT: ret <8 x i64> %xor1.i + // X64-LABEL: test_mm512_xor_epi32 + // X64: entry: + // X64-NEXT: %xor1.i = xor <8 x i64> %__b, %__a + // X64-NEXT: ret <8 x i64> %xor1.i return _mm512_xor_epi32(__a, __b); } __m512i test_mm512_xor_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_xor_epi64 - // CHECK: xor <8 x i64> + // APPLE-LABEL: test_mm512_xor_epi64 + // APPLE: entry: + // APPLE-NEXT: %xor.i = xor <8 x i64> %__b, %__a + // APPLE-NEXT: ret <8 x i64> %xor.i + // X64-LABEL: test_mm512_xor_epi64 + // X64: entry: + // X64-NEXT: %xor.i = xor <8 x i64> %__b, %__a + // X64-NEXT: ret <8 x i64> %xor.i return _mm512_xor_epi64(__a, __b); } __m512i test_mm512_maskz_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B){ - // CHECK-LABEL: @test_mm512_maskz_andnot_epi32 - // CHECK: xor <16 x i32> %{{.*}}, - // CHECK: and <16 x i32> %{{.*}}, %{{.*}} - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_andnot_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = xor <8 x i64> %__A, + // APPLE-NEXT: %1 = and <8 x i64> %0, %__B + // APPLE-NEXT: %2 = bitcast <8 x i64> %1 to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_maskz_andnot_epi32 + // X64: entry: + // X64-NEXT: %0 = xor <8 x i64> %__A, + // X64-NEXT: %1 = and <8 x i64> %0, %__B + // X64-NEXT: %2 = bitcast <8 x i64> %1 to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_maskz_andnot_epi32(__k,__A,__B); } __m512i test_mm512_mask_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { - // CHECK-LABEL: @test_mm512_mask_andnot_epi32 - // CHECK: xor <16 x i32> %{{.*}}, - // CHECK: and <16 x i32> %{{.*}}, %{{.*}} - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_andnot_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = xor <8 x i64> %__A, + // APPLE-NEXT: %1 = and <8 x i64> %0, %__B + // APPLE-NEXT: %2 = bitcast <8 x i64> %1 to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %__src to <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask_andnot_epi32 + // X64: entry: + // X64-NEXT: %0 = xor <8 x i64> %__A, + // X64-NEXT: %1 = and <8 x i64> %0, %__B + // X64-NEXT: %2 = bitcast <8 x i64> %1 to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %__src to <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask_andnot_epi32(__src,__k,__A,__B); } __m512i test_mm512_andnot_si512(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_andnot_si512 - //CHECK: load {{.*}}%__A.addr.i, align 64 - //CHECK: %neg.i = xor{{.*}}, - //CHECK: load {{.*}}%__B.addr.i, align 64 - //CHECK: and <8 x i64> %neg.i,{{.*}} + // APPLE-LABEL: test_mm512_andnot_si512 + // APPLE: entry: + // APPLE-NEXT: %neg.i = xor <8 x i64> %__A, + // APPLE-NEXT: %and.i = and <8 x i64> %neg.i, %__B + // APPLE-NEXT: ret <8 x i64> %and.i + // X64-LABEL: test_mm512_andnot_si512 + // X64: entry: + // X64-NEXT: %neg.i = xor <8 x i64> %__A, + // X64-NEXT: %and.i = and <8 x i64> %neg.i, %__B + // X64-NEXT: ret <8 x i64> %and.i return _mm512_andnot_si512(__A, __B); } __m512i test_mm512_andnot_epi32(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_andnot_epi32 - // CHECK: xor <16 x i32> %{{.*}}, - // CHECK: and <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_andnot_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = xor <8 x i64> %__A, + // APPLE-NEXT: %1 = and <8 x i64> %0, %__B + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_andnot_epi32 + // X64: entry: + // X64-NEXT: %0 = xor <8 x i64> %__A, + // X64-NEXT: %1 = and <8 x i64> %0, %__B + // X64-NEXT: ret <8 x i64> %1 return _mm512_andnot_epi32(__A,__B); } __m512i test_mm512_maskz_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_andnot_epi64 - // CHECK: xor <8 x i64> %{{.*}}, - // CHECK: and <8 x i64> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_andnot_epi64 + // APPLE: entry: + // APPLE-NEXT: %neg.i.i.i = xor <8 x i64> %__A, + // APPLE-NEXT: %and.i.i.i = and <8 x i64> %neg.i.i.i, %__B + // APPLE-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %and.i.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_andnot_epi64 + // X64: entry: + // X64-NEXT: %neg.i.i.i = xor <8 x i64> %__A, + // X64-NEXT: %and.i.i.i = and <8 x i64> %neg.i.i.i, %__B + // X64-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %and.i.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_andnot_epi64(__k,__A,__B); } __m512i test_mm512_mask_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_andnot_epi64 - // CHECK: xor <8 x i64> %{{.*}}, - // CHECK: and <8 x i64> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_andnot_epi64 + // APPLE: entry: + // APPLE-NEXT: %neg.i.i = xor <8 x i64> %__A, + // APPLE-NEXT: %and.i.i = and <8 x i64> %neg.i.i, %__B + // APPLE-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %and.i.i, <8 x i64> %__src + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_andnot_epi64 + // X64: entry: + // X64-NEXT: %neg.i.i = xor <8 x i64> %__A, + // X64-NEXT: %and.i.i = and <8 x i64> %neg.i.i, %__B + // X64-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %and.i.i, <8 x i64> %__src + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_andnot_epi64(__src,__k,__A,__B); } __m512i test_mm512_andnot_epi64(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_andnot_epi64 - // CHECK: xor <8 x i64> %{{.*}}, - // CHECK: and <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_andnot_epi64 + // APPLE: entry: + // APPLE-NEXT: %neg.i = xor <8 x i64> %__A, + // APPLE-NEXT: %and.i = and <8 x i64> %neg.i, %__B + // APPLE-NEXT: ret <8 x i64> %and.i + // X64-LABEL: test_mm512_andnot_epi64 + // X64: entry: + // X64-NEXT: %neg.i = xor <8 x i64> %__A, + // X64-NEXT: %and.i = and <8 x i64> %neg.i, %__B + // X64-NEXT: ret <8 x i64> %and.i return _mm512_andnot_epi64(__A,__B); } __m512i test_mm512_maskz_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_sub_epi32 - //CHECK: sub <16 x i32> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_sub_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %sub.i.i = sub <16 x i32> %0, %1 + // APPLE-NEXT: %2 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %sub.i.i, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_sub_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %sub.i.i = sub <16 x i32> %0, %1 + // X64-NEXT: %2 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %sub.i.i, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_sub_epi32(__k,__A,__B); } __m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_sub_epi32 - //CHECK: sub <16 x i32> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sub_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %sub.i.i = sub <16 x i32> %0, %1 + // APPLE-NEXT: %2 = bitcast <8 x i64> %__src to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %sub.i.i, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_sub_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %sub.i.i = sub <16 x i32> %0, %1 + // X64-NEXT: %2 = bitcast <8 x i64> %__src to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %sub.i.i, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_sub_epi32(__src,__k,__A,__B); } __m512i test_mm512_sub_epi32(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_sub_epi32 - //CHECK: sub <16 x i32> + // APPLE-LABEL: test_mm512_sub_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %sub.i = sub <16 x i32> %0, %1 + // APPLE-NEXT: %2 = bitcast <16 x i32> %sub.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_sub_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %sub.i = sub <16 x i32> %0, %1 + // X64-NEXT: %2 = bitcast <16 x i32> %sub.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_sub_epi32(__A,__B); } __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_sub_epi64 - //CHECK: sub <8 x i64> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_sub_epi64 + // APPLE: entry: + // APPLE-NEXT: %sub.i.i = sub <8 x i64> %__A, %__B + // APPLE-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %sub.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_sub_epi64 + // X64: entry: + // X64-NEXT: %sub.i.i = sub <8 x i64> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %sub.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_sub_epi64(__k,__A,__B); } __m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_sub_epi64 - //CHECK: sub <8 x i64> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sub_epi64 + // APPLE: entry: + // APPLE-NEXT: %sub.i.i = sub <8 x i64> %__A, %__B + // APPLE-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %sub.i.i, <8 x i64> %__src + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_sub_epi64 + // X64: entry: + // X64-NEXT: %sub.i.i = sub <8 x i64> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %sub.i.i, <8 x i64> %__src + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_sub_epi64(__src,__k,__A,__B); } __m512i test_mm512_sub_epi64(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_sub_epi64 - //CHECK: sub <8 x i64> + // APPLE-LABEL: test_mm512_sub_epi64 + // APPLE: entry: + // APPLE-NEXT: %sub.i = sub <8 x i64> %__A, %__B + // APPLE-NEXT: ret <8 x i64> %sub.i + // X64-LABEL: test_mm512_sub_epi64 + // X64: entry: + // X64-NEXT: %sub.i = sub <8 x i64> %__A, %__B + // X64-NEXT: ret <8 x i64> %sub.i return _mm512_sub_epi64(__A,__B); } __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_add_epi32 - //CHECK: add <16 x i32> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_add_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %add.i.i = add <16 x i32> %1, %0 + // APPLE-NEXT: %2 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %add.i.i, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_add_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %add.i.i = add <16 x i32> %1, %0 + // X64-NEXT: %2 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %add.i.i, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_add_epi32(__k,__A,__B); } __m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_add_epi32 - //CHECK: add <16 x i32> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_add_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %add.i.i = add <16 x i32> %1, %0 + // APPLE-NEXT: %2 = bitcast <8 x i64> %__src to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %add.i.i, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_add_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %add.i.i = add <16 x i32> %1, %0 + // X64-NEXT: %2 = bitcast <8 x i64> %__src to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %add.i.i, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_add_epi32(__src,__k,__A,__B); } __m512i test_mm512_add_epi32(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_add_epi32 - //CHECK: add <16 x i32> + // APPLE-LABEL: test_mm512_add_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %add.i = add <16 x i32> %1, %0 + // APPLE-NEXT: %2 = bitcast <16 x i32> %add.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_add_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %add.i = add <16 x i32> %1, %0 + // X64-NEXT: %2 = bitcast <16 x i32> %add.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_add_epi32(__A,__B); } __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_add_epi64 - //CHECK: add <8 x i64> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_add_epi64 + // APPLE: entry: + // APPLE-NEXT: %add.i.i = add <8 x i64> %__B, %__A + // APPLE-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %add.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_add_epi64 + // X64: entry: + // X64-NEXT: %add.i.i = add <8 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %add.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_add_epi64(__k,__A,__B); } __m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_add_epi64 - //CHECK: add <8 x i64> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_add_epi64 + // APPLE: entry: + // APPLE-NEXT: %add.i.i = add <8 x i64> %__B, %__A + // APPLE-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %add.i.i, <8 x i64> %__src + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_add_epi64 + // X64: entry: + // X64-NEXT: %add.i.i = add <8 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %add.i.i, <8 x i64> %__src + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_add_epi64(__src,__k,__A,__B); } __m512i test_mm512_add_epi64(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_add_epi64 - //CHECK: add <8 x i64> + // APPLE-LABEL: test_mm512_add_epi64 + // APPLE: entry: + // APPLE-NEXT: %add.i = add <8 x i64> %__B, %__A + // APPLE-NEXT: ret <8 x i64> %add.i + // X64-LABEL: test_mm512_add_epi64 + // X64: entry: + // X64-NEXT: %add.i = add <8 x i64> %__B, %__A + // X64-NEXT: ret <8 x i64> %add.i return _mm512_add_epi64(__A,__B); } __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_mul_epi32 - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, - //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mul_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = shl <8 x i64> %__A, + // APPLE-NEXT: %1 = ashr exact <8 x i64> %0, + // APPLE-NEXT: %2 = shl <8 x i64> %__B, + // APPLE-NEXT: %3 = ashr exact <8 x i64> %2, + // APPLE-NEXT: %4 = mul nsw <8 x i64> %3, %1 + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mul_epi32 + // X64: entry: + // X64-NEXT: %0 = shl <8 x i64> %__A, + // X64-NEXT: %1 = ashr exact <8 x i64> %0, + // X64-NEXT: %2 = shl <8 x i64> %__B, + // X64-NEXT: %3 = ashr exact <8 x i64> %2, + // X64-NEXT: %4 = mul nsw <8 x i64> %3, %1 + // X64-NEXT: ret <8 x i64> %4 return _mm512_mul_epi32(__A,__B); } __m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_mul_epi32 - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, - //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_mul_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = shl <8 x i64> %__A, + // APPLE-NEXT: %1 = ashr exact <8 x i64> %0, + // APPLE-NEXT: %2 = shl <8 x i64> %__B, + // APPLE-NEXT: %3 = ashr exact <8 x i64> %2, + // APPLE-NEXT: %4 = mul nsw <8 x i64> %3, %1 + // APPLE-NEXT: %5 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_maskz_mul_epi32 + // X64: entry: + // X64-NEXT: %0 = shl <8 x i64> %__A, + // X64-NEXT: %1 = ashr exact <8 x i64> %0, + // X64-NEXT: %2 = shl <8 x i64> %__B, + // X64-NEXT: %3 = ashr exact <8 x i64> %2, + // X64-NEXT: %4 = mul nsw <8 x i64> %3, %1 + // X64-NEXT: %5 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %6 return _mm512_maskz_mul_epi32(__k,__A,__B); } __m512i test_mm512_mask_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_mul_epi32 - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, - //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_mul_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = shl <8 x i64> %__A, + // APPLE-NEXT: %1 = ashr exact <8 x i64> %0, + // APPLE-NEXT: %2 = shl <8 x i64> %__B, + // APPLE-NEXT: %3 = ashr exact <8 x i64> %2, + // APPLE-NEXT: %4 = mul nsw <8 x i64> %3, %1 + // APPLE-NEXT: %5 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> %__src + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask_mul_epi32 + // X64: entry: + // X64-NEXT: %0 = shl <8 x i64> %__A, + // X64-NEXT: %1 = ashr exact <8 x i64> %0, + // X64-NEXT: %2 = shl <8 x i64> %__B, + // X64-NEXT: %3 = ashr exact <8 x i64> %2, + // X64-NEXT: %4 = mul nsw <8 x i64> %3, %1 + // X64-NEXT: %5 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> %__src + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask_mul_epi32(__src,__k,__A,__B); } __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_mul_epu32 - //CHECK: and <8 x i64> %{{.*}}, - //CHECK: and <8 x i64> %{{.*}}, - //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mul_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = and <8 x i64> %__A, + // APPLE-NEXT: %1 = and <8 x i64> %__B, + // APPLE-NEXT: %2 = mul nuw <8 x i64> %1, %0 + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mul_epu32 + // X64: entry: + // X64-NEXT: %0 = and <8 x i64> %__A, + // X64-NEXT: %1 = and <8 x i64> %__B, + // X64-NEXT: %2 = mul nuw <8 x i64> %1, %0 + // X64-NEXT: ret <8 x i64> %2 return _mm512_mul_epu32(__A,__B); } __m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_mul_epu32 - //CHECK: and <8 x i64> %{{.*}}, - //CHECK: and <8 x i64> %{{.*}}, - //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_mul_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = and <8 x i64> %__A, + // APPLE-NEXT: %1 = and <8 x i64> %__B, + // APPLE-NEXT: %2 = mul nuw <8 x i64> %1, %0 + // APPLE-NEXT: %3 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_mul_epu32 + // X64: entry: + // X64-NEXT: %0 = and <8 x i64> %__A, + // X64-NEXT: %1 = and <8 x i64> %__B, + // X64-NEXT: %2 = mul nuw <8 x i64> %1, %0 + // X64-NEXT: %3 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_mul_epu32(__k,__A,__B); } __m512i test_mm512_mask_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_mul_epu32 - //CHECK: and <8 x i64> %{{.*}}, - //CHECK: and <8 x i64> %{{.*}}, - //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_mul_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = and <8 x i64> %__A, + // APPLE-NEXT: %1 = and <8 x i64> %__B, + // APPLE-NEXT: %2 = mul nuw <8 x i64> %1, %0 + // APPLE-NEXT: %3 = bitcast i8 %__k to <8 x i1> + // APPLE-NEXT: %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> %__src + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_mul_epu32 + // X64: entry: + // X64-NEXT: %0 = and <8 x i64> %__A, + // X64-NEXT: %1 = and <8 x i64> %__B, + // X64-NEXT: %2 = mul nuw <8 x i64> %1, %0 + // X64-NEXT: %3 = bitcast i8 %__k to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> %__src + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_mul_epu32(__src,__k,__A,__B); } __m512i test_mm512_maskz_mullo_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_mullo_epi32 - //CHECK: mul <16 x i32> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_mullo_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %mul.i.i = mul <16 x i32> %1, %0 + // APPLE-NEXT: %2 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %mul.i.i, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_mullo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %mul.i.i = mul <16 x i32> %1, %0 + // X64-NEXT: %2 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %mul.i.i, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_mullo_epi32(__k,__A,__B); } __m512i test_mm512_mask_mullo_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_mullo_epi32 - //CHECK: mul <16 x i32> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_mullo_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %mul.i.i = mul <16 x i32> %1, %0 + // APPLE-NEXT: %2 = bitcast <8 x i64> %__src to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__k to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %mul.i.i, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_mullo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %mul.i.i = mul <16 x i32> %1, %0 + // X64-NEXT: %2 = bitcast <8 x i64> %__src to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__k to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %mul.i.i, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_mullo_epi32(__src,__k,__A,__B); } __m512i test_mm512_mullo_epi32(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_mullo_epi32 - //CHECK: mul <16 x i32> + // APPLE-LABEL: test_mm512_mullo_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %mul.i = mul <16 x i32> %1, %0 + // APPLE-NEXT: %2 = bitcast <16 x i32> %mul.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mullo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %mul.i = mul <16 x i32> %1, %0 + // X64-NEXT: %2 = bitcast <16 x i32> %mul.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_mullo_epi32(__A,__B); } __m512i test_mm512_mullox_epi64 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mullox_epi64 - // CHECK: mul <8 x i64> + // APPLE-LABEL: test_mm512_mullox_epi64 + // APPLE: entry: + // APPLE-NEXT: %mul.i = mul <8 x i64> %__B, %__A + // APPLE-NEXT: ret <8 x i64> %mul.i + // X64-LABEL: test_mm512_mullox_epi64 + // X64: entry: + // X64-NEXT: %mul.i = mul <8 x i64> %__B, %__A + // X64-NEXT: ret <8 x i64> %mul.i return (__m512i) _mm512_mullox_epi64(__A, __B); } __m512i test_mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_mullox_epi64 - // CHECK: mul <8 x i64> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_mullox_epi64 + // APPLE: entry: + // APPLE-NEXT: %mul.i.i = mul <8 x i64> %__B, %__A + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %mul.i.i, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_mullox_epi64 + // X64: entry: + // X64-NEXT: %mul.i.i = mul <8 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %mul.i.i, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %1 return (__m512i) _mm512_mask_mullox_epi64(__W, __U, __A, __B); } __m512d test_mm512_add_round_pd(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_add_round_pd - // CHECK: @llvm.x86.avx512.add.pd.512 + // APPLE-LABEL: test_mm512_add_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_add_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_add_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_add_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_add_round_pd - // CHECK: @llvm.x86.avx512.add.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_add_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_add_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_add_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_add_round_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_add_round_pd - // CHECK: @llvm.x86.avx512.add.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_add_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_add_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_add_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_add_pd - // CHECK: fadd <8 x double> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_add_pd + // APPLE: entry: + // APPLE-NEXT: %add.i.i = fadd <8 x double> %__A, %__B + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %add.i.i, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_add_pd + // X64: entry: + // X64-NEXT: %add.i.i = fadd <8 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %add.i.i, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_add_pd(__W,__U,__A,__B); } __m512d test_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_add_pd - // CHECK: fadd <8 x double> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_add_pd + // APPLE: entry: + // APPLE-NEXT: %add.i.i = fadd <8 x double> %__A, %__B + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %add.i.i, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_add_pd + // X64: entry: + // X64-NEXT: %add.i.i = fadd <8 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %add.i.i, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_add_pd(__U,__A,__B); } __m512 test_mm512_add_round_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_add_round_ps - // CHECK: @llvm.x86.avx512.add.ps.512 + // APPLE-LABEL: test_mm512_add_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_add_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // X64-NEXT: ret <16 x float> %0 return _mm512_add_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_add_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_add_round_ps - // CHECK: @llvm.x86.avx512.add.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_add_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_add_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_add_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_add_round_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_add_round_ps - // CHECK: @llvm.x86.avx512.add.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_add_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_add_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_add_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_add_ps - // CHECK: fadd <16 x float> %{{.*}}, %{{.*}} - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_add_ps + // APPLE: entry: + // APPLE-NEXT: %add.i.i = fadd <16 x float> %__A, %__B + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %add.i.i, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_add_ps + // X64: entry: + // X64-NEXT: %add.i.i = fadd <16 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %add.i.i, <16 x float> %__W + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_add_ps(__W,__U,__A,__B); } __m512 test_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_add_ps - // CHECK: fadd <16 x float> %{{.*}}, %{{.*}} - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_add_ps + // APPLE: entry: + // APPLE-NEXT: %add.i.i = fadd <16 x float> %__A, %__B + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %add.i.i, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_add_ps + // X64: entry: + // X64-NEXT: %add.i.i = fadd <16 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %add.i.i, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_add_ps(__U,__A,__B); } __m128 test_mm_add_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_add_round_ss - // CHECK: @llvm.x86.avx512.mask.add.ss.round + // APPLE-LABEL: test_mm_add_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 -1, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_add_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 -1, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_add_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_add_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_add_round_ss - // CHECK: @llvm.x86.avx512.mask.add.ss.round + // APPLE-LABEL: test_mm_mask_add_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_add_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_mask_add_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_add_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_add_round_ss - // CHECK: @llvm.x86.avx512.mask.add.ss.round + // APPLE-LABEL: test_mm_maskz_add_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_add_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_add_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_add_ss - // CHECK-NOT: @llvm.x86.avx512.mask.add.ss.round - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fadd float %{{.*}}, %{{.*}} - // CHECK: insertelement <4 x float> %{{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} - // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_mask_add_ss + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // APPLE-NEXT: %add.i.i = fadd float %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // APPLE-NEXT: %3 = select i1 %2, float %add.i.i, float %0 + // APPLE-NEXT: %4 = insertelement <4 x float> %__A, float %3, i64 0 + // APPLE-NEXT: ret <4 x float> %4 + // X64-LABEL: test_mm_mask_add_ss + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // X64-NEXT: %add.i.i = fadd float %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // X64-NEXT: %3 = select i1 %2, float %add.i.i, float %0 + // X64-NEXT: %4 = insertelement <4 x float> %__A, float %3, i64 0 + // X64-NEXT: ret <4 x float> %4 return _mm_mask_add_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_add_ss - // CHECK-NOT: @llvm.x86.avx512.mask.add.ss.round - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fadd float %{{.*}}, %{{.*}} - // CHECK: insertelement <4 x float> %{{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} - // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_maskz_add_ss + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // APPLE-NEXT: %add.i.i = fadd float %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // APPLE-NEXT: %2 = select i1 %1, float %add.i.i, float 0.000000e+00 + // APPLE-NEXT: %3 = insertelement <4 x float> %__A, float %2, i64 0 + // APPLE-NEXT: ret <4 x float> %3 + // X64-LABEL: test_mm_maskz_add_ss + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // X64-NEXT: %add.i.i = fadd float %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // X64-NEXT: %2 = select i1 %1, float %add.i.i, float 0.000000e+00 + // X64-NEXT: %3 = insertelement <4 x float> %__A, float %2, i64 0 + // X64-NEXT: ret <4 x float> %3 return _mm_maskz_add_ss(__U,__A,__B); } __m128d test_mm_add_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_add_round_sd - // CHECK: @llvm.x86.avx512.mask.add.sd.round + // APPLE-LABEL: test_mm_add_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 -1, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_add_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 -1, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_add_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_add_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_add_round_sd - // CHECK: @llvm.x86.avx512.mask.add.sd.round + // APPLE-LABEL: test_mm_mask_add_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_add_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_add_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_add_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_add_round_sd - // CHECK: @llvm.x86.avx512.mask.add.sd.round + // APPLE-LABEL: test_mm_maskz_add_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_add_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_add_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_add_sd - // CHECK-NOT: @llvm.x86.avx512.mask.add.sd.round - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fadd double %{{.*}}, %{{.*}} - // CHECK: insertelement <2 x double> {{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} - // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_mask_add_sd + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // APPLE-NEXT: %add.i.i = fadd double %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // APPLE-NEXT: %3 = select i1 %2, double %add.i.i, double %0 + // APPLE-NEXT: %4 = insertelement <2 x double> %__A, double %3, i64 0 + // APPLE-NEXT: ret <2 x double> %4 + // X64-LABEL: test_mm_mask_add_sd + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // X64-NEXT: %add.i.i = fadd double %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // X64-NEXT: %3 = select i1 %2, double %add.i.i, double %0 + // X64-NEXT: %4 = insertelement <2 x double> %__A, double %3, i64 0 + // X64-NEXT: ret <2 x double> %4 return _mm_mask_add_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_add_sd - // CHECK-NOT: @llvm.x86.avx512.mask.add.sd.round - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fadd double %{{.*}}, %{{.*}} - // CHECK: insertelement <2 x double> {{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} - // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_maskz_add_sd + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // APPLE-NEXT: %add.i.i = fadd double %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // APPLE-NEXT: %2 = select i1 %1, double %add.i.i, double 0.000000e+00 + // APPLE-NEXT: %3 = insertelement <2 x double> %__A, double %2, i64 0 + // APPLE-NEXT: ret <2 x double> %3 + // X64-LABEL: test_mm_maskz_add_sd + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // X64-NEXT: %add.i.i = fadd double %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // X64-NEXT: %2 = select i1 %1, double %add.i.i, double 0.000000e+00 + // X64-NEXT: %3 = insertelement <2 x double> %__A, double %2, i64 0 + // X64-NEXT: ret <2 x double> %3 return _mm_maskz_add_sd(__U,__A,__B); } __m512d test_mm512_sub_round_pd(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_sub_round_pd - // CHECK: @llvm.x86.avx512.sub.pd.512 + // APPLE-LABEL: test_mm512_sub_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_sub_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_sub_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_sub_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_sub_round_pd - // CHECK: @llvm.x86.avx512.sub.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sub_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_sub_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_sub_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_sub_round_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_sub_round_pd - // CHECK: @llvm.x86.avx512.sub.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_sub_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_sub_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_sub_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_sub_pd - // CHECK: fsub <8 x double> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sub_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i.i = fsub <8 x double> %__A, %__B + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %sub.i.i, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_sub_pd + // X64: entry: + // X64-NEXT: %sub.i.i = fsub <8 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %sub.i.i, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_sub_pd(__W,__U,__A,__B); } __m512d test_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_sub_pd - // CHECK: fsub <8 x double> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_sub_pd + // APPLE: entry: + // APPLE-NEXT: %sub.i.i = fsub <8 x double> %__A, %__B + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %sub.i.i, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_sub_pd + // X64: entry: + // X64-NEXT: %sub.i.i = fsub <8 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %sub.i.i, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_sub_pd(__U,__A,__B); } __m512 test_mm512_sub_round_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_sub_round_ps - // CHECK: @llvm.x86.avx512.sub.ps.512 + // APPLE-LABEL: test_mm512_sub_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_sub_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // X64-NEXT: ret <16 x float> %0 return _mm512_sub_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_sub_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_sub_round_ps - // CHECK: @llvm.x86.avx512.sub.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sub_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_sub_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_sub_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_sub_round_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_sub_round_ps - // CHECK: @llvm.x86.avx512.sub.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_sub_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_sub_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_sub_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_sub_ps - // CHECK: fsub <16 x float> %{{.*}}, %{{.*}} - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sub_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i.i = fsub <16 x float> %__A, %__B + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %sub.i.i, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_sub_ps + // X64: entry: + // X64-NEXT: %sub.i.i = fsub <16 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %sub.i.i, <16 x float> %__W + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_sub_ps(__W,__U,__A,__B); } __m512 test_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_sub_ps - // CHECK: fsub <16 x float> %{{.*}}, %{{.*}} - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_sub_ps + // APPLE: entry: + // APPLE-NEXT: %sub.i.i = fsub <16 x float> %__A, %__B + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %sub.i.i, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_sub_ps + // X64: entry: + // X64-NEXT: %sub.i.i = fsub <16 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %sub.i.i, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_sub_ps(__U,__A,__B); } __m128 test_mm_sub_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_sub_round_ss - // CHECK: @llvm.x86.avx512.mask.sub.ss.round + // APPLE-LABEL: test_mm_sub_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 -1, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_sub_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 -1, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_sub_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_sub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_sub_round_ss - // CHECK: @llvm.x86.avx512.mask.sub.ss.round + // APPLE-LABEL: test_mm_mask_sub_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_sub_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_mask_sub_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_sub_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_sub_round_ss - // CHECK: @llvm.x86.avx512.mask.sub.ss.round + // APPLE-LABEL: test_mm_maskz_sub_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_sub_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_sub_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_sub_ss - // CHECK-NOT: @llvm.x86.avx512.mask.sub.ss.round - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fsub float %{{.*}}, %{{.*}} - // CHECK: insertelement <4 x float> {{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} - // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_mask_sub_ss + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // APPLE-NEXT: %sub.i.i = fsub float %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // APPLE-NEXT: %3 = select i1 %2, float %sub.i.i, float %0 + // APPLE-NEXT: %4 = insertelement <4 x float> %__A, float %3, i64 0 + // APPLE-NEXT: ret <4 x float> %4 + // X64-LABEL: test_mm_mask_sub_ss + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // X64-NEXT: %sub.i.i = fsub float %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // X64-NEXT: %3 = select i1 %2, float %sub.i.i, float %0 + // X64-NEXT: %4 = insertelement <4 x float> %__A, float %3, i64 0 + // X64-NEXT: ret <4 x float> %4 return _mm_mask_sub_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_sub_ss - // CHECK-NOT: @llvm.x86.avx512.mask.sub.ss.round - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fsub float %{{.*}}, %{{.*}} - // CHECK: insertelement <4 x float> {{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} - // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_maskz_sub_ss + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // APPLE-NEXT: %sub.i.i = fsub float %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // APPLE-NEXT: %2 = select i1 %1, float %sub.i.i, float 0.000000e+00 + // APPLE-NEXT: %3 = insertelement <4 x float> %__A, float %2, i64 0 + // APPLE-NEXT: ret <4 x float> %3 + // X64-LABEL: test_mm_maskz_sub_ss + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // X64-NEXT: %sub.i.i = fsub float %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // X64-NEXT: %2 = select i1 %1, float %sub.i.i, float 0.000000e+00 + // X64-NEXT: %3 = insertelement <4 x float> %__A, float %2, i64 0 + // X64-NEXT: ret <4 x float> %3 return _mm_maskz_sub_ss(__U,__A,__B); } __m128d test_mm_sub_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_sub_round_sd - // CHECK: @llvm.x86.avx512.mask.sub.sd.round + // APPLE-LABEL: test_mm_sub_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 -1, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_sub_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 -1, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_sub_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_sub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_sub_round_sd - // CHECK: @llvm.x86.avx512.mask.sub.sd.round + // APPLE-LABEL: test_mm_mask_sub_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_sub_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_sub_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_sub_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_sub_round_sd - // CHECK: @llvm.x86.avx512.mask.sub.sd.round + // APPLE-LABEL: test_mm_maskz_sub_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_sub_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_sub_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_sub_sd - // CHECK-NOT: @llvm.x86.avx512.mask.sub.sd.round - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fsub double %{{.*}}, %{{.*}} - // CHECK: insertelement <2 x double> {{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} - // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_mask_sub_sd + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // APPLE-NEXT: %sub.i.i = fsub double %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // APPLE-NEXT: %3 = select i1 %2, double %sub.i.i, double %0 + // APPLE-NEXT: %4 = insertelement <2 x double> %__A, double %3, i64 0 + // APPLE-NEXT: ret <2 x double> %4 + // X64-LABEL: test_mm_mask_sub_sd + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // X64-NEXT: %sub.i.i = fsub double %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // X64-NEXT: %3 = select i1 %2, double %sub.i.i, double %0 + // X64-NEXT: %4 = insertelement <2 x double> %__A, double %3, i64 0 + // X64-NEXT: ret <2 x double> %4 return _mm_mask_sub_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_sub_sd - // CHECK-NOT: @llvm.x86.avx512.mask.sub.sd.round - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fsub double %{{.*}}, %{{.*}} - // CHECK: insertelement <2 x double> {{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} - // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_maskz_sub_sd + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // APPLE-NEXT: %sub.i.i = fsub double %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // APPLE-NEXT: %2 = select i1 %1, double %sub.i.i, double 0.000000e+00 + // APPLE-NEXT: %3 = insertelement <2 x double> %__A, double %2, i64 0 + // APPLE-NEXT: ret <2 x double> %3 + // X64-LABEL: test_mm_maskz_sub_sd + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // X64-NEXT: %sub.i.i = fsub double %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // X64-NEXT: %2 = select i1 %1, double %sub.i.i, double 0.000000e+00 + // X64-NEXT: %3 = insertelement <2 x double> %__A, double %2, i64 0 + // X64-NEXT: ret <2 x double> %3 return _mm_maskz_sub_sd(__U,__A,__B); } __m512d test_mm512_mul_round_pd(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mul_round_pd - // CHECK: @llvm.x86.avx512.mul.pd.512 + // APPLE-LABEL: test_mm512_mul_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mul_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_mul_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_mul_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_mul_round_pd - // CHECK: @llvm.x86.avx512.mul.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_mul_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_mul_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_mul_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_mul_round_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_mul_round_pd - // CHECK: @llvm.x86.avx512.mul.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_mul_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_mul_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_mul_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_mul_pd - // CHECK: fmul <8 x double> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_mul_pd + // APPLE: entry: + // APPLE-NEXT: %mul.i.i = fmul <8 x double> %__A, %__B + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %mul.i.i, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_mul_pd + // X64: entry: + // X64-NEXT: %mul.i.i = fmul <8 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %mul.i.i, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_mul_pd(__W,__U,__A,__B); } __m512d test_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_mul_pd - // CHECK: fmul <8 x double> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_mul_pd + // APPLE: entry: + // APPLE-NEXT: %mul.i.i = fmul <8 x double> %__A, %__B + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %mul.i.i, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_mul_pd + // X64: entry: + // X64-NEXT: %mul.i.i = fmul <8 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %mul.i.i, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_mul_pd(__U,__A,__B); } __m512 test_mm512_mul_round_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mul_round_ps - // CHECK: @llvm.x86.avx512.mul.ps.512 + // APPLE-LABEL: test_mm512_mul_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_mul_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // X64-NEXT: ret <16 x float> %0 return _mm512_mul_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_mul_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_mul_round_ps - // CHECK: @llvm.x86.avx512.mul.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_mul_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_mul_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_mul_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_mul_round_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_mul_round_ps - // CHECK: @llvm.x86.avx512.mul.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_mul_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_mul_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_mul_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_mul_ps - // CHECK: fmul <16 x float> %{{.*}}, %{{.*}} - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_mul_ps + // APPLE: entry: + // APPLE-NEXT: %mul.i.i = fmul <16 x float> %__A, %__B + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %mul.i.i, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_mul_ps + // X64: entry: + // X64-NEXT: %mul.i.i = fmul <16 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %mul.i.i, <16 x float> %__W + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_mul_ps(__W,__U,__A,__B); } __m512 test_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_mul_ps - // CHECK: fmul <16 x float> %{{.*}}, %{{.*}} - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_mul_ps + // APPLE: entry: + // APPLE-NEXT: %mul.i.i = fmul <16 x float> %__A, %__B + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %mul.i.i, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_mul_ps + // X64: entry: + // X64-NEXT: %mul.i.i = fmul <16 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %mul.i.i, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_mul_ps(__U,__A,__B); } __m128 test_mm_mul_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mul_round_ss - // CHECK: @llvm.x86.avx512.mask.mul.ss.round + // APPLE-LABEL: test_mm_mul_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 -1, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mul_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 -1, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_mul_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_mul_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_mul_round_ss - // CHECK: @llvm.x86.avx512.mask.mul.ss.round + // APPLE-LABEL: test_mm_mask_mul_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_mul_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_mask_mul_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_mul_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_mul_round_ss - // CHECK: @llvm.x86.avx512.mask.mul.ss.round + // APPLE-LABEL: test_mm_maskz_mul_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_mul_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_mul_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_mul_ss - // CHECK-NOT: @llvm.x86.avx512.mask.mul.ss.round - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fmul float %{{.*}}, %{{.*}} - // CHECK: insertelement <4 x float> {{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} - // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_mask_mul_ss + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // APPLE-NEXT: %mul.i.i = fmul float %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // APPLE-NEXT: %3 = select i1 %2, float %mul.i.i, float %0 + // APPLE-NEXT: %4 = insertelement <4 x float> %__A, float %3, i64 0 + // APPLE-NEXT: ret <4 x float> %4 + // X64-LABEL: test_mm_mask_mul_ss + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // X64-NEXT: %mul.i.i = fmul float %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // X64-NEXT: %3 = select i1 %2, float %mul.i.i, float %0 + // X64-NEXT: %4 = insertelement <4 x float> %__A, float %3, i64 0 + // X64-NEXT: ret <4 x float> %4 return _mm_mask_mul_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_mul_ss - // CHECK-NOT: @llvm.x86.avx512.mask.mul.ss.round - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fmul float %{{.*}}, %{{.*}} - // CHECK: insertelement <4 x float> {{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} - // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_maskz_mul_ss + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // APPLE-NEXT: %mul.i.i = fmul float %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // APPLE-NEXT: %2 = select i1 %1, float %mul.i.i, float 0.000000e+00 + // APPLE-NEXT: %3 = insertelement <4 x float> %__A, float %2, i64 0 + // APPLE-NEXT: ret <4 x float> %3 + // X64-LABEL: test_mm_maskz_mul_ss + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // X64-NEXT: %mul.i.i = fmul float %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // X64-NEXT: %2 = select i1 %1, float %mul.i.i, float 0.000000e+00 + // X64-NEXT: %3 = insertelement <4 x float> %__A, float %2, i64 0 + // X64-NEXT: ret <4 x float> %3 return _mm_maskz_mul_ss(__U,__A,__B); } __m128d test_mm_mul_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mul_round_sd - // CHECK: @llvm.x86.avx512.mask.mul.sd.round + // APPLE-LABEL: test_mm_mul_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 -1, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mul_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 -1, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_mul_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_mul_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_mul_round_sd - // CHECK: @llvm.x86.avx512.mask.mul.sd.round + // APPLE-LABEL: test_mm_mask_mul_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_mul_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_mul_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_mul_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_mul_round_sd - // CHECK: @llvm.x86.avx512.mask.mul.sd.round + // APPLE-LABEL: test_mm_maskz_mul_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_mul_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_mul_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_mul_sd - // CHECK-NOT: @llvm.x86.avx512.mask.mul.sd.round - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fmul double %{{.*}}, %{{.*}} - // CHECK: insertelement <2 x double> {{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} - // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_mask_mul_sd + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // APPLE-NEXT: %mul.i.i = fmul double %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // APPLE-NEXT: %3 = select i1 %2, double %mul.i.i, double %0 + // APPLE-NEXT: %4 = insertelement <2 x double> %__A, double %3, i64 0 + // APPLE-NEXT: ret <2 x double> %4 + // X64-LABEL: test_mm_mask_mul_sd + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // X64-NEXT: %mul.i.i = fmul double %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // X64-NEXT: %3 = select i1 %2, double %mul.i.i, double %0 + // X64-NEXT: %4 = insertelement <2 x double> %__A, double %3, i64 0 + // X64-NEXT: ret <2 x double> %4 return _mm_mask_mul_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_mul_sd - // CHECK-NOT: @llvm.x86.avx512.mask.mul.sd.round - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fmul double %{{.*}}, %{{.*}} - // CHECK: insertelement <2 x double> {{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} - // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_maskz_mul_sd + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // APPLE-NEXT: %mul.i.i = fmul double %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // APPLE-NEXT: %2 = select i1 %1, double %mul.i.i, double 0.000000e+00 + // APPLE-NEXT: %3 = insertelement <2 x double> %__A, double %2, i64 0 + // APPLE-NEXT: ret <2 x double> %3 + // X64-LABEL: test_mm_maskz_mul_sd + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // X64-NEXT: %mul.i.i = fmul double %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // X64-NEXT: %2 = select i1 %1, double %mul.i.i, double 0.000000e+00 + // X64-NEXT: %3 = insertelement <2 x double> %__A, double %2, i64 0 + // X64-NEXT: ret <2 x double> %3 return _mm_maskz_mul_sd(__U,__A,__B); } __m512d test_mm512_div_round_pd(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_div_round_pd - // CHECK: @llvm.x86.avx512.div.pd.512 + // APPLE-LABEL: test_mm512_div_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_div_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_div_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_div_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_div_round_pd - // CHECK: @llvm.x86.avx512.div.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_div_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_div_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_div_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_div_round_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_div_round_pd - // CHECK: @llvm.x86.avx512.div.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_div_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_div_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %__A, <8 x double> %__B, i32 8) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_div_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_div_pd(__m512d __a, __m512d __b) { - // CHECK-LABEL: @test_mm512_div_pd - // CHECK: fdiv <8 x double> + // APPLE-LABEL: test_mm512_div_pd + // APPLE: entry: + // APPLE-NEXT: %div.i = fdiv <8 x double> %__a, %__b + // APPLE-NEXT: ret <8 x double> %div.i + // X64-LABEL: test_mm512_div_pd + // X64: entry: + // X64-NEXT: %div.i = fdiv <8 x double> %__a, %__b + // X64-NEXT: ret <8 x double> %div.i return _mm512_div_pd(__a,__b); } __m512d test_mm512_mask_div_pd(__m512d __w, __mmask8 __u, __m512d __a, __m512d __b) { - // CHECK-LABEL: @test_mm512_mask_div_pd - // CHECK: fdiv <8 x double> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_div_pd + // APPLE: entry: + // APPLE-NEXT: %div.i.i = fdiv <8 x double> %__a, %__b + // APPLE-NEXT: %0 = bitcast i8 %__u to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %div.i.i, <8 x double> %__w + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_div_pd + // X64: entry: + // X64-NEXT: %div.i.i = fdiv <8 x double> %__a, %__b + // X64-NEXT: %0 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %div.i.i, <8 x double> %__w + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_div_pd(__w,__u,__a,__b); } __m512d test_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_div_pd - // CHECK: fdiv <8 x double> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_div_pd + // APPLE: entry: + // APPLE-NEXT: %div.i.i = fdiv <8 x double> %__A, %__B + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %div.i.i, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_div_pd + // X64: entry: + // X64-NEXT: %div.i.i = fdiv <8 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %div.i.i, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_div_pd(__U,__A,__B); } __m512 test_mm512_div_round_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_div_round_ps - // CHECK: @llvm.x86.avx512.div.ps.512 + // APPLE-LABEL: test_mm512_div_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_div_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // X64-NEXT: ret <16 x float> %0 return _mm512_div_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_div_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_div_round_ps - // CHECK: @llvm.x86.avx512.div.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_div_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_div_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_div_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_div_round_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_div_round_ps - // CHECK: @llvm.x86.avx512.div.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_div_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_div_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %__A, <16 x float> %__B, i32 8) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_div_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_div_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_div_ps - // CHECK: fdiv <16 x float> + // APPLE-LABEL: test_mm512_div_ps + // APPLE: entry: + // APPLE-NEXT: %div.i = fdiv <16 x float> %__A, %__B + // APPLE-NEXT: ret <16 x float> %div.i + // X64-LABEL: test_mm512_div_ps + // X64: entry: + // X64-NEXT: %div.i = fdiv <16 x float> %__A, %__B + // X64-NEXT: ret <16 x float> %div.i return _mm512_div_ps(__A,__B); } __m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_div_ps - // CHECK: fdiv <16 x float> %{{.*}}, %{{.*}} - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_div_ps + // APPLE: entry: + // APPLE-NEXT: %div.i.i = fdiv <16 x float> %__A, %__B + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %div.i.i, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_div_ps + // X64: entry: + // X64-NEXT: %div.i.i = fdiv <16 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %div.i.i, <16 x float> %__W + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_div_ps(__W,__U,__A,__B); } __m512 test_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_div_ps - // CHECK: fdiv <16 x float> %{{.*}}, %{{.*}} - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_div_ps + // APPLE: entry: + // APPLE-NEXT: %div.i.i = fdiv <16 x float> %__A, %__B + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %div.i.i, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_div_ps + // X64: entry: + // X64-NEXT: %div.i.i = fdiv <16 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %div.i.i, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_div_ps(__U,__A,__B); } __m128 test_mm_div_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_div_round_ss - // CHECK: @llvm.x86.avx512.mask.div.ss.round + // APPLE-LABEL: test_mm_div_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 -1, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_div_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 -1, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_div_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_div_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_div_round_ss - // CHECK: @llvm.x86.avx512.mask.div.ss.round + // APPLE-LABEL: test_mm_mask_div_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_div_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_mask_div_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_div_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_div_round_ss - // CHECK: @llvm.x86.avx512.mask.div.ss.round + // APPLE-LABEL: test_mm_maskz_div_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_div_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_div_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_div_ss - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fdiv float %{{.*}}, %{{.*}} - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} - // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_mask_div_ss + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // APPLE-NEXT: %div.i.i = fdiv float %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // APPLE-NEXT: %3 = select i1 %2, float %div.i.i, float %0 + // APPLE-NEXT: %4 = insertelement <4 x float> %__A, float %3, i64 0 + // APPLE-NEXT: ret <4 x float> %4 + // X64-LABEL: test_mm_mask_div_ss + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // X64-NEXT: %div.i.i = fdiv float %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // X64-NEXT: %3 = select i1 %2, float %div.i.i, float %0 + // X64-NEXT: %4 = insertelement <4 x float> %__A, float %3, i64 0 + // X64-NEXT: ret <4 x float> %4 return _mm_mask_div_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_div_ss - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fdiv float %{{.*}}, %{{.*}} - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} - // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_maskz_div_ss + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // APPLE-NEXT: %div.i.i = fdiv float %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // APPLE-NEXT: %2 = select i1 %1, float %div.i.i, float 0.000000e+00 + // APPLE-NEXT: %3 = insertelement <4 x float> %__A, float %2, i64 0 + // APPLE-NEXT: ret <4 x float> %3 + // X64-LABEL: test_mm_maskz_div_ss + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <4 x float> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + // X64-NEXT: %div.i.i = fdiv float %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // X64-NEXT: %2 = select i1 %1, float %div.i.i, float 0.000000e+00 + // X64-NEXT: %3 = insertelement <4 x float> %__A, float %2, i64 0 + // X64-NEXT: ret <4 x float> %3 return _mm_maskz_div_ss(__U,__A,__B); } __m128d test_mm_div_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_div_round_sd - // CHECK: @llvm.x86.avx512.mask.div.sd.round + // APPLE-LABEL: test_mm_div_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 -1, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_div_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 -1, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_div_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_div_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_div_round_sd - // CHECK: @llvm.x86.avx512.mask.div.sd.round + // APPLE-LABEL: test_mm_mask_div_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_div_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_div_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_div_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_div_round_sd - // CHECK: @llvm.x86.avx512.mask.div.sd.round + // APPLE-LABEL: test_mm_maskz_div_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_div_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_div_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_div_sd - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fdiv double %{{.*}}, %{{.*}} - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} - // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_mask_div_sd + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // APPLE-NEXT: %div.i.i = fdiv double %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // APPLE-NEXT: %3 = select i1 %2, double %div.i.i, double %0 + // APPLE-NEXT: %4 = insertelement <2 x double> %__A, double %3, i64 0 + // APPLE-NEXT: ret <2 x double> %4 + // X64-LABEL: test_mm_mask_div_sd + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // X64-NEXT: %div.i.i = fdiv double %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // X64-NEXT: %3 = select i1 %2, double %div.i.i, double %0 + // X64-NEXT: %4 = insertelement <2 x double> %__A, double %3, i64 0 + // X64-NEXT: ret <2 x double> %4 return _mm_mask_div_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_div_sd - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fdiv double %{{.*}}, %{{.*}} - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} - // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 + // APPLE-LABEL: test_mm_maskz_div_sd + // APPLE: entry: + // APPLE-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // APPLE-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // APPLE-NEXT: %div.i.i = fdiv double %vecext1.i.i, %vecext.i.i + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // APPLE-NEXT: %2 = select i1 %1, double %div.i.i, double 0.000000e+00 + // APPLE-NEXT: %3 = insertelement <2 x double> %__A, double %2, i64 0 + // APPLE-NEXT: ret <2 x double> %3 + // X64-LABEL: test_mm_maskz_div_sd + // X64: entry: + // X64-NEXT: %vecext.i.i = extractelement <2 x double> %__B, i32 0 + // X64-NEXT: %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + // X64-NEXT: %div.i.i = fdiv double %vecext1.i.i, %vecext.i.i + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = extractelement <8 x i1> %0, i64 0 + // X64-NEXT: %2 = select i1 %1, double %div.i.i, double 0.000000e+00 + // X64-NEXT: %3 = insertelement <2 x double> %__A, double %2, i64 0 + // X64-NEXT: ret <2 x double> %3 return _mm_maskz_div_sd(__U,__A,__B); } __m128 test_mm_max_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_max_round_ss - // CHECK: @llvm.x86.avx512.mask.max.ss.round + // APPLE-LABEL: test_mm_max_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 -1, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_max_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 -1, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_max_round_ss(__A,__B,0x08); } __m128 test_mm_mask_max_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_max_round_ss - // CHECK: @llvm.x86.avx512.mask.max.ss.round + // APPLE-LABEL: test_mm_mask_max_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_max_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_mask_max_round_ss(__W,__U,__A,__B,0x08); } __m128 test_mm_maskz_max_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_max_round_ss - // CHECK: @llvm.x86.avx512.mask.max.ss.round + // APPLE-LABEL: test_mm_maskz_max_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_max_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_max_round_ss(__U,__A,__B,0x08); } __m128 test_mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_max_ss - // CHECK: @llvm.x86.avx512.mask.max.ss.round + // APPLE-LABEL: test_mm_mask_max_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_max_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_mask_max_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_max_ss - // CHECK: @llvm.x86.avx512.mask.max.ss.round + // APPLE-LABEL: test_mm_maskz_max_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_max_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 4) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_max_ss(__U,__A,__B); } __m128d test_mm_max_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_max_round_sd - // CHECK: @llvm.x86.avx512.mask.max.sd.round + // APPLE-LABEL: test_mm_max_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 -1, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_max_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 -1, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_max_round_sd(__A,__B,0x08); } __m128d test_mm_mask_max_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_max_round_sd - // CHECK: @llvm.x86.avx512.mask.max.sd.round + // APPLE-LABEL: test_mm_mask_max_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_max_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_max_round_sd(__W,__U,__A,__B,0x08); } __m128d test_mm_maskz_max_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_max_round_sd - // CHECK: @llvm.x86.avx512.mask.max.sd.round + // APPLE-LABEL: test_mm_maskz_max_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_max_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_max_round_sd(__U,__A,__B,0x08); } __m128d test_mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_max_sd - // CHECK: @llvm.x86.avx512.mask.max.sd.round + // APPLE-LABEL: test_mm_mask_max_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_max_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_mask_max_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_max_sd - // CHECK: @llvm.x86.avx512.mask.max.sd.round + // APPLE-LABEL: test_mm_maskz_max_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_max_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 4) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_max_sd(__U,__A,__B); } __m128 test_mm_min_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_min_round_ss - // CHECK: @llvm.x86.avx512.mask.min.ss.round + // APPLE-LABEL: test_mm_min_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 -1, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_min_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 -1, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_min_round_ss(__A,__B,0x08); } __m128 test_mm_mask_min_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_min_round_ss - // CHECK: @llvm.x86.avx512.mask.min.ss.round + // APPLE-LABEL: test_mm_mask_min_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_min_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_mask_min_round_ss(__W,__U,__A,__B,0x08); } __m128 test_mm_maskz_min_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_min_round_ss - // CHECK: @llvm.x86.avx512.mask.min.ss.round + // APPLE-LABEL: test_mm_maskz_min_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_min_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_min_round_ss(__U,__A,__B,0x08); } __m128 test_mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_min_ss - // CHECK: @llvm.x86.avx512.mask.min.ss.round + // APPLE-LABEL: test_mm_mask_min_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_min_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_mask_min_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_min_ss - // CHECK: @llvm.x86.avx512.mask.min.ss.round + // APPLE-LABEL: test_mm_maskz_min_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_min_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %__A, <4 x float> %__B, <4 x float> , i8 %__U, i32 4) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_min_ss(__U,__A,__B); } __m128d test_mm_min_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_min_round_sd - // CHECK: @llvm.x86.avx512.mask.min.sd.round + // APPLE-LABEL: test_mm_min_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 -1, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_min_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 -1, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_min_round_sd(__A,__B,0x08); } __m128d test_mm_mask_min_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_min_round_sd - // CHECK: @llvm.x86.avx512.mask.min.sd.round + // APPLE-LABEL: test_mm_mask_min_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_min_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_min_round_sd(__W,__U,__A,__B,0x08); } __m128d test_mm_maskz_min_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_min_round_sd - // CHECK: @llvm.x86.avx512.mask.min.sd.round + // APPLE-LABEL: test_mm_maskz_min_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_min_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_min_round_sd(__U,__A,__B,0x08); } __m128d test_mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_min_sd - // CHECK: @llvm.x86.avx512.mask.min.sd.round + // APPLE-LABEL: test_mm_mask_min_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_min_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_mask_min_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_min_sd - // CHECK: @llvm.x86.avx512.mask.min.sd.round + // APPLE-LABEL: test_mm_maskz_min_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_min_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %__A, <2 x double> %__B, <2 x double> , i8 %__U, i32 4) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_min_sd(__U,__A,__B); } __m512 test_mm512_undefined() { - // CHECK-LABEL: @test_mm512_undefined - // CHECK: ret <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_undefined + // APPLE: entry: + // APPLE-NEXT: ret <16 x float> zeroinitializer + // X64-LABEL: test_mm512_undefined + // X64: entry: + // X64-NEXT: ret <16 x float> zeroinitializer return _mm512_undefined(); } __m512 test_mm512_undefined_ps() { - // CHECK-LABEL: @test_mm512_undefined_ps - // CHECK: ret <16 x float> zeroinitializer + // APPLE-LABEL: test_mm512_undefined_ps + // APPLE: entry: + // APPLE-NEXT: ret <16 x float> zeroinitializer + // X64-LABEL: test_mm512_undefined_ps + // X64: entry: + // X64-NEXT: ret <16 x float> zeroinitializer return _mm512_undefined_ps(); } __m512d test_mm512_undefined_pd() { - // CHECK-LABEL: @test_mm512_undefined_pd - // CHECK: ret <8 x double> zeroinitializer + // APPLE-LABEL: test_mm512_undefined_pd + // APPLE: entry: + // APPLE-NEXT: ret <8 x double> zeroinitializer + // X64-LABEL: test_mm512_undefined_pd + // X64: entry: + // X64-NEXT: ret <8 x double> zeroinitializer return _mm512_undefined_pd(); } __m512i test_mm512_undefined_epi32() { - // CHECK-LABEL: @test_mm512_undefined_epi32 - // CHECK: ret <8 x i64> zeroinitializer + // APPLE-LABEL: test_mm512_undefined_epi32 + // APPLE: entry: + // APPLE-NEXT: ret <8 x i64> zeroinitializer + // X64-LABEL: test_mm512_undefined_epi32 + // X64: entry: + // X64-NEXT: ret <8 x i64> zeroinitializer return _mm512_undefined_epi32(); } __m512i test_mm512_cvtepi8_epi32(__m128i __A) { - // CHECK-LABEL: @test_mm512_cvtepi8_epi32 - // CHECK: sext <16 x i8> %{{.*}} to <16 x i32> + // APPLE-LABEL: test_mm512_cvtepi8_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // APPLE-NEXT: %conv.i = sext <16 x i8> %0 to <16 x i32> + // APPLE-NEXT: %1 = bitcast <16 x i32> %conv.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_cvtepi8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %conv.i = sext <16 x i8> %0 to <16 x i32> + // X64-NEXT: %1 = bitcast <16 x i32> %conv.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_cvtepi8_epi32(__A); } __m512i test_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi8_epi32 - // CHECK: sext <16 x i8> %{{.*}} to <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepi8_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // APPLE-NEXT: %conv.i.i = sext <16 x i8> %0 to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %conv.i.i, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_cvtepi8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %conv.i.i = sext <16 x i8> %0 to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %conv.i.i, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_cvtepi8_epi32(__W, __U, __A); } __m512i test_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi8_epi32 - // CHECK: sext <16 x i8> %{{.*}} to <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepi8_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // APPLE-NEXT: %conv.i.i = sext <16 x i8> %0 to <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x i32> %conv.i.i, <16 x i32> zeroinitializer + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_cvtepi8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %conv.i.i = sext <16 x i8> %0 to <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x i32> %conv.i.i, <16 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_cvtepi8_epi32(__U, __A); } __m512i test_mm512_cvtepi8_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm512_cvtepi8_epi64 - // CHECK: sext <8 x i8> %{{.*}} to <8 x i64> + // APPLE-LABEL: test_mm512_cvtepi8_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // APPLE-NEXT: %shuffle.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // APPLE-NEXT: %conv.i = sext <8 x i8> %shuffle.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %conv.i + // X64-LABEL: test_mm512_cvtepi8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // X64-NEXT: %conv.i = sext <8 x i8> %shuffle.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %conv.i return _mm512_cvtepi8_epi64(__A); } __m512i test_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi8_epi64 - // CHECK: sext <8 x i8> %{{.*}} to <8 x i64> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepi8_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // APPLE-NEXT: %conv.i.i = sext <8 x i8> %shuffle.i.i to <8 x i64> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtepi8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // X64-NEXT: %conv.i.i = sext <8 x i8> %shuffle.i.i to <8 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_cvtepi8_epi64(__W, __U, __A); } __m512i test_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi8_epi64 - // CHECK: sext <8 x i8> %{{.*}} to <8 x i64> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepi8_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // APPLE-NEXT: %conv.i.i = sext <8 x i8> %shuffle.i.i to <8 x i64> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_cvtepi8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // X64-NEXT: %conv.i.i = sext <8 x i8> %shuffle.i.i to <8 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_cvtepi8_epi64(__U, __A); } __m512i test_mm512_cvtepi32_epi64(__m256i __X) { - // CHECK-LABEL: @test_mm512_cvtepi32_epi64 - // CHECK: sext <8 x i32> %{{.*}} to <8 x i64> + // APPLE-LABEL: test_mm512_cvtepi32_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // APPLE-NEXT: %conv.i = sext <8 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %conv.i + // X64-LABEL: test_mm512_cvtepi32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %conv.i = sext <8 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %conv.i return _mm512_cvtepi32_epi64(__X); } __m512i test_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_epi64 - // CHECK: sext <8 x i32> %{{.*}} to <8 x i64> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepi32_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // APPLE-NEXT: %conv.i.i = sext <8 x i32> %0 to <8 x i64> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtepi32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %conv.i.i = sext <8 x i32> %0 to <8 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_cvtepi32_epi64(__W, __U, __X); } __m512i test_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi32_epi64 - // CHECK: sext <8 x i32> %{{.*}} to <8 x i64> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepi32_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // APPLE-NEXT: %conv.i.i = sext <8 x i32> %0 to <8 x i64> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_cvtepi32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %conv.i.i = sext <8 x i32> %0 to <8 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_cvtepi32_epi64(__U, __X); } __m512i test_mm512_cvtepi16_epi32(__m256i __A) { - // CHECK-LABEL: @test_mm512_cvtepi16_epi32 - // CHECK: sext <16 x i16> %{{.*}} to <16 x i32> + // APPLE-LABEL: test_mm512_cvtepi16_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // APPLE-NEXT: %conv.i = sext <16 x i16> %0 to <16 x i32> + // APPLE-NEXT: %1 = bitcast <16 x i32> %conv.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_cvtepi16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // X64-NEXT: %conv.i = sext <16 x i16> %0 to <16 x i32> + // X64-NEXT: %1 = bitcast <16 x i32> %conv.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_cvtepi16_epi32(__A); } __m512i test_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi16_epi32 - // CHECK: sext <16 x i16> %{{.*}} to <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepi16_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // APPLE-NEXT: %conv.i.i = sext <16 x i16> %0 to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %conv.i.i, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_cvtepi16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // X64-NEXT: %conv.i.i = sext <16 x i16> %0 to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %conv.i.i, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_cvtepi16_epi32(__W, __U, __A); } __m512i test_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi16_epi32 - // CHECK: sext <16 x i16> %{{.*}} to <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepi16_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // APPLE-NEXT: %conv.i.i = sext <16 x i16> %0 to <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x i32> %conv.i.i, <16 x i32> zeroinitializer + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_cvtepi16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // X64-NEXT: %conv.i.i = sext <16 x i16> %0 to <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x i32> %conv.i.i, <16 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_cvtepi16_epi32(__U, __A); } __m512i test_mm512_cvtepi16_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm512_cvtepi16_epi64 - // CHECK: sext <8 x i16> %{{.*}} to <8 x i64> + // APPLE-LABEL: test_mm512_cvtepi16_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // APPLE-NEXT: %conv.i = sext <8 x i16> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %conv.i + // X64-LABEL: test_mm512_cvtepi16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %conv.i = sext <8 x i16> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %conv.i return _mm512_cvtepi16_epi64(__A); } __m512i test_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi16_epi64 - // CHECK: sext <8 x i16> %{{.*}} to <8 x i64> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepi16_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // APPLE-NEXT: %conv.i.i = sext <8 x i16> %0 to <8 x i64> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtepi16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %conv.i.i = sext <8 x i16> %0 to <8 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_cvtepi16_epi64(__W, __U, __A); } __m512i test_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi16_epi64 - // CHECK: sext <8 x i16> %{{.*}} to <8 x i64> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepi16_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // APPLE-NEXT: %conv.i.i = sext <8 x i16> %0 to <8 x i64> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_cvtepi16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %conv.i.i = sext <8 x i16> %0 to <8 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_cvtepi16_epi64(__U, __A); } __m512i test_mm512_cvtepu8_epi32(__m128i __A) { - // CHECK-LABEL: @test_mm512_cvtepu8_epi32 - // CHECK: zext <16 x i8> %{{.*}} to <16 x i32> + // APPLE-LABEL: test_mm512_cvtepu8_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // APPLE-NEXT: %conv.i = zext <16 x i8> %0 to <16 x i32> + // APPLE-NEXT: %1 = bitcast <16 x i32> %conv.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_cvtepu8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %conv.i = zext <16 x i8> %0 to <16 x i32> + // X64-NEXT: %1 = bitcast <16 x i32> %conv.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_cvtepu8_epi32(__A); } __m512i test_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu8_epi32 - // CHECK: zext <16 x i8> %{{.*}} to <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepu8_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // APPLE-NEXT: %conv.i.i = zext <16 x i8> %0 to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %conv.i.i, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_cvtepu8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %conv.i.i = zext <16 x i8> %0 to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %conv.i.i, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_cvtepu8_epi32(__W, __U, __A); } __m512i test_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu8_epi32 - // CHECK: zext <16 x i8> %{{.*}} to <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepu8_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // APPLE-NEXT: %conv.i.i = zext <16 x i8> %0 to <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x i32> %conv.i.i, <16 x i32> zeroinitializer + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_cvtepu8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %conv.i.i = zext <16 x i8> %0 to <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x i32> %conv.i.i, <16 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_cvtepu8_epi32(__U, __A); } __m512i test_mm512_cvtepu8_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm512_cvtepu8_epi64 - // CHECK: zext <8 x i8> %{{.*}} to <8 x i64> + // APPLE-LABEL: test_mm512_cvtepu8_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // APPLE-NEXT: %shuffle.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // APPLE-NEXT: %conv.i = zext <8 x i8> %shuffle.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %conv.i + // X64-LABEL: test_mm512_cvtepu8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // X64-NEXT: %conv.i = zext <8 x i8> %shuffle.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %conv.i return _mm512_cvtepu8_epi64(__A); } __m512i test_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu8_epi64 - // CHECK: zext <8 x i8> %{{.*}} to <8 x i64> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepu8_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // APPLE-NEXT: %conv.i.i = zext <8 x i8> %shuffle.i.i to <8 x i64> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtepu8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // X64-NEXT: %conv.i.i = zext <8 x i8> %shuffle.i.i to <8 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_cvtepu8_epi64(__W, __U, __A); } __m512i test_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu8_epi64 - // CHECK: zext <8 x i8> %{{.*}} to <8 x i64> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepu8_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // APPLE-NEXT: %conv.i.i = zext <8 x i8> %shuffle.i.i to <8 x i64> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_cvtepu8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // X64-NEXT: %conv.i.i = zext <8 x i8> %shuffle.i.i to <8 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_cvtepu8_epi64(__U, __A); } __m512i test_mm512_cvtepu32_epi64(__m256i __X) { - // CHECK-LABEL: @test_mm512_cvtepu32_epi64 - // CHECK: zext <8 x i32> %{{.*}} to <8 x i64> + // APPLE-LABEL: test_mm512_cvtepu32_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // APPLE-NEXT: %conv.i = zext <8 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %conv.i + // X64-LABEL: test_mm512_cvtepu32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %conv.i = zext <8 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %conv.i return _mm512_cvtepu32_epi64(__X); } __m512i test_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { - // CHECK-LABEL: @test_mm512_mask_cvtepu32_epi64 - // CHECK: zext <8 x i32> %{{.*}} to <8 x i64> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepu32_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // APPLE-NEXT: %conv.i.i = zext <8 x i32> %0 to <8 x i64> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtepu32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %conv.i.i = zext <8 x i32> %0 to <8 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_cvtepu32_epi64(__W, __U, __X); } __m512i test_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu32_epi64 - // CHECK: zext <8 x i32> %{{.*}} to <8 x i64> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepu32_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // APPLE-NEXT: %conv.i.i = zext <8 x i32> %0 to <8 x i64> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_cvtepu32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %conv.i.i = zext <8 x i32> %0 to <8 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_cvtepu32_epi64(__U, __X); } __m512i test_mm512_cvtepu16_epi32(__m256i __A) { - // CHECK-LABEL: @test_mm512_cvtepu16_epi32 - // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> + // APPLE-LABEL: test_mm512_cvtepu16_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // APPLE-NEXT: %conv.i = zext <16 x i16> %0 to <16 x i32> + // APPLE-NEXT: %1 = bitcast <16 x i32> %conv.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_cvtepu16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // X64-NEXT: %conv.i = zext <16 x i16> %0 to <16 x i32> + // X64-NEXT: %1 = bitcast <16 x i32> %conv.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_cvtepu16_epi32(__A); } __m512i test_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu16_epi32 - // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepu16_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // APPLE-NEXT: %conv.i.i = zext <16 x i16> %0 to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %conv.i.i, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_cvtepu16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // X64-NEXT: %conv.i.i = zext <16 x i16> %0 to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %conv.i.i, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_cvtepu16_epi32(__W, __U, __A); } __m512i test_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu16_epi32 - // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepu16_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // APPLE-NEXT: %conv.i.i = zext <16 x i16> %0 to <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x i32> %conv.i.i, <16 x i32> zeroinitializer + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_cvtepu16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // X64-NEXT: %conv.i.i = zext <16 x i16> %0 to <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x i32> %conv.i.i, <16 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_cvtepu16_epi32(__U, __A); } __m512i test_mm512_cvtepu16_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm512_cvtepu16_epi64 - // CHECK: zext <8 x i16> %{{.*}} to <8 x i64> + // APPLE-LABEL: test_mm512_cvtepu16_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // APPLE-NEXT: %conv.i = zext <8 x i16> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %conv.i + // X64-LABEL: test_mm512_cvtepu16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %conv.i = zext <8 x i16> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %conv.i return _mm512_cvtepu16_epi64(__A); } __m512i test_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu16_epi64 - // CHECK: zext <8 x i16> %{{.*}} to <8 x i64> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepu16_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // APPLE-NEXT: %conv.i.i = zext <8 x i16> %0 to <8 x i64> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtepu16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %conv.i.i = zext <8 x i16> %0 to <8 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_cvtepu16_epi64(__W, __U, __A); } __m512i test_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu16_epi64 - // CHECK: zext <8 x i16> %{{.*}} to <8 x i64> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepu16_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // APPLE-NEXT: %conv.i.i = zext <8 x i16> %0 to <8 x i64> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_cvtepu16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %conv.i.i = zext <8 x i16> %0 to <8 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %conv.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_cvtepu16_epi64(__U, __A); } __m512i test_mm512_rol_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_rol_epi32 - // CHECK: @llvm.fshl.v16i32 + // APPLE-LABEL: test_mm512_rol_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_rol_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_rol_epi32(__A, 5); } __m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_rol_epi32 - // CHECK: @llvm.fshl.v16i32 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_rol_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) + // APPLE-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_rol_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) + // X64-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_rol_epi32(__W, __U, __A, 5); } __m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_rol_epi32 - // CHECK: @llvm.fshl.v16i32 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_rol_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_rol_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_rol_epi32(__U, __A, 5); } __m512i test_mm512_rol_epi64(__m512i __A) { - // CHECK-LABEL: @test_mm512_rol_epi64 - // CHECK: @llvm.fshl.v8i64 + // APPLE-LABEL: test_mm512_rol_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_rol_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) + // X64-NEXT: ret <8 x i64> %0 return _mm512_rol_epi64(__A, 5); } __m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_rol_epi64 - // CHECK: @llvm.fshl.v8i64 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_rol_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_rol_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_rol_epi64(__W, __U, __A, 5); } __m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_rol_epi64 - // CHECK: @llvm.fshl.v8i64 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_rol_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_rol_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_rol_epi64(__U, __A, 5); } __m512i test_mm512_rolv_epi32(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_rolv_epi32 - // CHECK: @llvm.fshl.v16i32 + // APPLE-LABEL: test_mm512_rolv_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_rolv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_rolv_epi32(__A, __B); } __m512i test_mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_rolv_epi32 - // CHECK: @llvm.fshl.v16i32 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_rolv_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask_rolv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask_rolv_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_rolv_epi32 - // CHECK: @llvm.fshl.v16i32 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_rolv_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_maskz_rolv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_maskz_rolv_epi32(__U, __A, __B); } __m512i test_mm512_rolv_epi64(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_rolv_epi64 - // CHECK: @llvm.fshl.v8i64 + // APPLE-LABEL: test_mm512_rolv_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) #12 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_rolv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) #12 + // X64-NEXT: ret <8 x i64> %0 return _mm512_rolv_epi64(__A, __B); } __m512i test_mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_rolv_epi64 - // CHECK: @llvm.fshl.v8i64 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_rolv_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_rolv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_rolv_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_rolv_epi64 - // CHECK: @llvm.fshl.v8i64 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_rolv_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_rolv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_rolv_epi64(__U, __A, __B); } __m512i test_mm512_ror_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_ror_epi32 - // CHECK: @llvm.fshr.v16i32 + // APPLE-LABEL: test_mm512_ror_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_ror_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_ror_epi32(__A, 5); } __m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_ror_epi32 - // CHECK: @llvm.fshr.v16i32 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_ror_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) + // APPLE-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_ror_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) + // X64-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_ror_epi32(__W, __U, __A, 5); } __m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_ror_epi32 - // CHECK: @llvm.fshr.v16i32 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_ror_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_ror_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_ror_epi32(__U, __A, 5); } __m512i test_mm512_ror_epi64(__m512i __A) { - // CHECK-LABEL: @test_mm512_ror_epi64 - // CHECK: @llvm.fshr.v8i64 + // APPLE-LABEL: test_mm512_ror_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_ror_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) + // X64-NEXT: ret <8 x i64> %0 return _mm512_ror_epi64(__A, 5); } __m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_ror_epi64 - // CHECK: @llvm.fshr.v8i64 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_ror_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_ror_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_ror_epi64(__W, __U, __A, 5); } __m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_ror_epi64 - // CHECK: @llvm.fshr.v8i64 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_ror_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_ror_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_ror_epi64(__U, __A, 5); } __m512i test_mm512_rorv_epi32(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_rorv_epi32 - // CHECK: @llvm.fshr.v16i32 + // APPLE-LABEL: test_mm512_rorv_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_rorv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_rorv_epi32(__A, __B); } __m512i test_mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_rorv_epi32 - // CHECK: @llvm.fshr.v16i32 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_rorv_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask_rorv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask_rorv_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_rorv_epi32 - // CHECK: @llvm.fshr.v16i32 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_rorv_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_maskz_rorv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_maskz_rorv_epi32(__U, __A, __B); } __m512i test_mm512_rorv_epi64(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_rorv_epi64 - // CHECK: @llvm.fshr.v8i64 + // APPLE-LABEL: test_mm512_rorv_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) #12 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_rorv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) #12 + // X64-NEXT: ret <8 x i64> %0 return _mm512_rorv_epi64(__A, __B); } __m512i test_mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_rorv_epi64 - // CHECK: @llvm.fshr.v8i64 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_rorv_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_rorv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_rorv_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_rorv_epi64 - // CHECK: @llvm.fshr.v8i64 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_rorv_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_rorv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_rorv_epi64(__U, __A, __B); } __m512i test_mm512_slli_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_slli_epi32 - // CHECK: @llvm.x86.avx512.pslli.d.512 + // APPLE-LABEL: test_mm512_slli_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = shl <16 x i32> %0, + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_slli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = shl <16 x i32> %0, + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_slli_epi32(__A, 5); } __m512i test_mm512_slli_epi32_2(__m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_slli_epi32_2 - // CHECK: @llvm.x86.avx512.pslli.d.512 + // APPLE-LABEL: test_mm512_slli_epi32_2 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %0, i32 %__B) #12 + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_slli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %0, i32 %__B) #12 + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_slli_epi32(__A, __B); } __m512i test_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_slli_epi32 - // CHECK: @llvm.x86.avx512.pslli.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_slli_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = shl <16 x i32> %0, + // APPLE-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_slli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = shl <16 x i32> %0, + // X64-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_slli_epi32(__W, __U, __A, 5); } __m512i test_mm512_mask_slli_epi32_2(__m512i __W, __mmask16 __U, __m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_mask_slli_epi32_2 - // CHECK: @llvm.x86.avx512.pslli.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_slli_epi32_2 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %0, i32 %__B) #12 + // APPLE-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_slli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %0, i32 %__B) #12 + // X64-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_slli_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_slli_epi32 - // CHECK: @llvm.x86.avx512.pslli.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_slli_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = shl <16 x i32> %0, + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_slli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = shl <16 x i32> %0, + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_slli_epi32(__U, __A, 5); } __m512i test_mm512_maskz_slli_epi32_2(__mmask16 __U, __m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_maskz_slli_epi32_2 - // CHECK: @llvm.x86.avx512.pslli.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_slli_epi32_2 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %0, i32 %__B) #12 + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_slli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %0, i32 %__B) #12 + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_slli_epi32(__U, __A, __B); } __m512i test_mm512_slli_epi64(__m512i __A) { - // CHECK-LABEL: @test_mm512_slli_epi64 - // CHECK: @llvm.x86.avx512.pslli.q.512 + // APPLE-LABEL: test_mm512_slli_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = shl <8 x i64> %__A, + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_slli_epi64 + // X64: entry: + // X64-NEXT: %0 = shl <8 x i64> %__A, + // X64-NEXT: ret <8 x i64> %0 return _mm512_slli_epi64(__A, 5); } __m512i test_mm512_slli_epi64_2(__m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_slli_epi64_2 - // CHECK: @llvm.x86.avx512.pslli.q.512 + // APPLE-LABEL: test_mm512_slli_epi64_2 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %__A, i32 %__B) #12 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_slli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %__A, i32 %__B) #12 + // X64-NEXT: ret <8 x i64> %0 return _mm512_slli_epi64(__A, __B); } __m512i test_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_slli_epi64 - // CHECK: @llvm.x86.avx512.pslli.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_slli_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = shl <8 x i64> %__A, + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_slli_epi64 + // X64: entry: + // X64-NEXT: %0 = shl <8 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_slli_epi64(__W, __U, __A, 5); } __m512i test_mm512_mask_slli_epi64_2(__m512i __W, __mmask8 __U, __m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_mask_slli_epi64_2 - // CHECK: @llvm.x86.avx512.pslli.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_slli_epi64_2 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %__A, i32 %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_slli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %__A, i32 %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_slli_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_slli_epi64 - // CHECK: @llvm.x86.avx512.pslli.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_slli_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = shl <8 x i64> %__A, + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_slli_epi64 + // X64: entry: + // X64-NEXT: %0 = shl <8 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_slli_epi64(__U, __A, 5); } __m512i test_mm512_maskz_slli_epi64_2(__mmask8 __U, __m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_maskz_slli_epi64_2 - // CHECK: @llvm.x86.avx512.pslli.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_slli_epi64_2 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %__A, i32 %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_slli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %__A, i32 %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_slli_epi64(__U, __A, __B); } __m512i test_mm512_srli_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_srli_epi32 - // CHECK: @llvm.x86.avx512.psrli.d.512 + // APPLE-LABEL: test_mm512_srli_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = lshr <16 x i32> %0, + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_srli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = lshr <16 x i32> %0, + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_srli_epi32(__A, 5); } __m512i test_mm512_srli_epi32_2(__m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_srli_epi32_2 - // CHECK: @llvm.x86.avx512.psrli.d.512 + // APPLE-LABEL: test_mm512_srli_epi32_2 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %0, i32 %__B) #12 + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_srli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %0, i32 %__B) #12 + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_srli_epi32(__A, __B); } __m512i test_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_srli_epi32 - // CHECK: @llvm.x86.avx512.psrli.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srli_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = lshr <16 x i32> %0, + // APPLE-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_srli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = lshr <16 x i32> %0, + // X64-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_srli_epi32(__W, __U, __A, 5); } __m512i test_mm512_mask_srli_epi32_2(__m512i __W, __mmask16 __U, __m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_mask_srli_epi32_2 - // CHECK: @llvm.x86.avx512.psrli.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srli_epi32_2 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %0, i32 %__B) #12 + // APPLE-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_srli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %0, i32 %__B) #12 + // X64-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_srli_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_srli_epi32 - // CHECK: @llvm.x86.avx512.psrli.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srli_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = lshr <16 x i32> %0, + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_srli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = lshr <16 x i32> %0, + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_srli_epi32(__U, __A, 5); } __m512i test_mm512_maskz_srli_epi32_2(__mmask16 __U, __m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_maskz_srli_epi32_2 - // CHECK: @llvm.x86.avx512.psrli.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srli_epi32_2 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %0, i32 %__B) #12 + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_srli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %0, i32 %__B) #12 + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_srli_epi32(__U, __A, __B); } __m512i test_mm512_srli_epi64(__m512i __A) { - // CHECK-LABEL: @test_mm512_srli_epi64 - // CHECK: @llvm.x86.avx512.psrli.q.512 + // APPLE-LABEL: test_mm512_srli_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = lshr <8 x i64> %__A, + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_srli_epi64 + // X64: entry: + // X64-NEXT: %0 = lshr <8 x i64> %__A, + // X64-NEXT: ret <8 x i64> %0 return _mm512_srli_epi64(__A, 5); } __m512i test_mm512_srli_epi64_2(__m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_srli_epi64_2 - // CHECK: @llvm.x86.avx512.psrli.q.512 + // APPLE-LABEL: test_mm512_srli_epi64_2 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %__A, i32 %__B) #12 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_srli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %__A, i32 %__B) #12 + // X64-NEXT: ret <8 x i64> %0 return _mm512_srli_epi64(__A, __B); } __m512i test_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_srli_epi64 - // CHECK: @llvm.x86.avx512.psrli.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srli_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = lshr <8 x i64> %__A, + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_srli_epi64 + // X64: entry: + // X64-NEXT: %0 = lshr <8 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_srli_epi64(__W, __U, __A, 5); } __m512i test_mm512_mask_srli_epi64_2(__m512i __W, __mmask8 __U, __m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_mask_srli_epi64_2 - // CHECK: @llvm.x86.avx512.psrli.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srli_epi64_2 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %__A, i32 %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_srli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %__A, i32 %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_srli_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_srli_epi64 - // CHECK: @llvm.x86.avx512.psrli.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srli_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = lshr <8 x i64> %__A, + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_srli_epi64 + // X64: entry: + // X64-NEXT: %0 = lshr <8 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_srli_epi64(__U, __A, 5); } __m512i test_mm512_maskz_srli_epi64_2(__mmask8 __U, __m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_maskz_srli_epi64_2 - // CHECK: @llvm.x86.avx512.psrli.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srli_epi64_2 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %__A, i32 %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_srli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %__A, i32 %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_srli_epi64(__U, __A, __B); } __m512i test_mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_mask_load_epi32 - // CHECK: @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %{{.*}}, i32 64, <16 x i1> %{{.*}}, <16 x i32> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_load_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <16 x i32>* + // APPLE-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = tail call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %0, i32 64, <16 x i1> %2, <16 x i32> %1) #12 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_load_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <16 x i32>* + // X64-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = tail call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %0, i32 64, <16 x i1> %2, <16 x i32> %1) #12 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_load_epi32(__W, __U, __P); } __m512i test_mm512_maskz_load_epi32(__mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_maskz_load_epi32 - // CHECK: @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %{{.*}}, i32 64, <16 x i1> %{{.*}}, <16 x i32> %{{.*}}) + // APPLE-LABEL: test_mm512_maskz_load_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <16 x i32>* + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %0, i32 64, <16 x i1> %1, <16 x i32> zeroinitializer) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_load_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <16 x i32>* + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %0, i32 64, <16 x i1> %1, <16 x i32> zeroinitializer) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_load_epi32(__U, __P); } __m512i test_mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_mov_epi32 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_mov_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_mov_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_mov_epi32(__W, __U, __A); } __m512i test_mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_mov_epi32 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_mov_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_mov_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_mov_epi32(__U, __A); } __m512i test_mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_mov_epi64 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_mov_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_mov_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_mov_epi64(__W, __U, __A); } __m512i test_mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_mov_epi64 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_mov_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_mov_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_mov_epi64(__U, __A); } __m512i test_mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_mask_load_epi64 - // CHECK: @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* %{{.*}}, i32 64, <8 x i1> %{{.*}}, <8 x i64> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_load_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* %0, i32 64, <8 x i1> %1, <8 x i64> %__W) #12 + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_load_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* %0, i32 64, <8 x i1> %1, <8 x i64> %__W) #12 + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_load_epi64(__W, __U, __P); } __m512i test_mm512_maskz_load_epi64(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_maskz_load_epi64 - // CHECK: @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* %{{.*}}, i32 64, <8 x i1> %{{.*}}, <8 x i64> %{{.*}}) + // APPLE-LABEL: test_mm512_maskz_load_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* %0, i32 64, <8 x i1> %1, <8 x i64> zeroinitializer) #12 + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_load_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* %0, i32 64, <8 x i1> %1, <8 x i64> zeroinitializer) #12 + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_load_epi64(__U, __P); } void test_mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_store_epi32 - // CHECK: @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %{{.*}}, <16 x i32>* %{{.*}}, i32 64, <16 x i1> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_store_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <16 x i32>* + // APPLE-NEXT: %1 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: tail call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %1, <16 x i32>* %0, i32 64, <16 x i1> %2) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_store_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <16 x i32>* + // X64-NEXT: %1 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: tail call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %1, <16 x i32>* %0, i32 64, <16 x i1> %2) #12 + // X64-NEXT: ret void return _mm512_mask_store_epi32(__P, __U, __A); } void test_mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_store_epi64 - // CHECK: @llvm.masked.store.v8i64.p0v8i64(<8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, i32 64, <8 x i1> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_store_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: tail call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> %__A, <8 x i64>* %0, i32 64, <8 x i1> %1) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_store_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: tail call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> %__A, <8 x i64>* %0, i32 64, <8 x i1> %1) #12 + // X64-NEXT: ret void return _mm512_mask_store_epi64(__P, __U, __A); } __m512d test_mm512_movedup_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_movedup_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_movedup_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <8 x double> %__A, <8 x double> undef, <8 x i32> + // APPLE-NEXT: ret <8 x double> %shuffle.i + // X64-LABEL: test_mm512_movedup_pd + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <8 x double> %__A, <8 x double> undef, <8 x i32> + // X64-NEXT: ret <8 x double> %shuffle.i return _mm512_movedup_pd(__A); } __m512d test_mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_movedup_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_movedup_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x double> %__A, <8 x double> undef, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_movedup_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x double> %__A, <8 x double> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_movedup_pd(__W, __U, __A); } __m512d test_mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_movedup_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_movedup_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x double> %__A, <8 x double> undef, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_movedup_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x double> %__A, <8 x double> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_movedup_pd(__U, __A); } int test_mm_comi_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_comi_round_sd - // CHECK: @llvm.x86.avx512.vcomi.sd + // APPLE-LABEL: test_mm_comi_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %__A, <2 x double> %__B, i32 5, i32 8) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_comi_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %__A, <2 x double> %__B, i32 5, i32 8) + // X64-NEXT: ret i32 %0 return _mm_comi_round_sd(__A, __B, 5, _MM_FROUND_NO_EXC); } int test_mm_comi_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_comi_round_ss - // CHECK: @llvm.x86.avx512.vcomi.ss + // APPLE-LABEL: test_mm_comi_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %__A, <4 x float> %__B, i32 5, i32 8) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_comi_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %__A, <4 x float> %__B, i32 5, i32 8) + // X64-NEXT: ret i32 %0 return _mm_comi_round_ss(__A, __B, 5, _MM_FROUND_NO_EXC); } __m512d test_mm512_fixupimm_round_pd(__m512d __A, __m512d __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_fixupimm_round_pd - // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.512 + // APPLE-LABEL: test_mm512_fixupimm_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x i64> %__C, i32 5, i8 -1, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_fixupimm_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x i64> %__C, i32 5, i8 -1, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_fixupimm_round_pd(__A, __B, __C, 5, 8); } __m512d test_mm512_mask_fixupimm_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_fixupimm_round_pd - // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.512 + // APPLE-LABEL: test_mm512_mask_fixupimm_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x i64> %__C, i32 5, i8 %__U, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_fixupimm_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x i64> %__C, i32 5, i8 %__U, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_fixupimm_round_pd(__A, __U, __B, __C, 5, 8); } __m512d test_mm512_fixupimm_pd(__m512d __A, __m512d __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_fixupimm_pd - // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.512 + // APPLE-LABEL: test_mm512_fixupimm_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x i64> %__C, i32 5, i8 -1, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_fixupimm_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x i64> %__C, i32 5, i8 -1, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_fixupimm_pd(__A, __B, __C, 5); } __m512d test_mm512_mask_fixupimm_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_fixupimm_pd - // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.512 + // APPLE-LABEL: test_mm512_mask_fixupimm_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x i64> %__C, i32 5, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_fixupimm_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x i64> %__C, i32 5, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_fixupimm_pd(__A, __U, __B, __C, 5); } __m512d test_mm512_maskz_fixupimm_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_fixupimm_round_pd - // CHECK: @llvm.x86.avx512.maskz.fixupimm.pd.512 + // APPLE-LABEL: test_mm512_maskz_fixupimm_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x i64> %__C, i32 5, i8 %__U, i32 8) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_maskz_fixupimm_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x i64> %__C, i32 5, i8 %__U, i32 8) + // X64-NEXT: ret <8 x double> %0 return _mm512_maskz_fixupimm_round_pd(__U, __A, __B, __C, 5, 8); } __m512d test_mm512_maskz_fixupimm_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_fixupimm_pd - // CHECK: @llvm.x86.avx512.maskz.fixupimm.pd.512 + // APPLE-LABEL: test_mm512_maskz_fixupimm_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x i64> %__C, i32 5, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_maskz_fixupimm_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x i64> %__C, i32 5, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_maskz_fixupimm_pd(__U, __A, __B, __C, 5); } __m512 test_mm512_fixupimm_round_ps(__m512 __A, __m512 __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_fixupimm_round_ps - // CHECK: @llvm.x86.avx512.mask.fixupimm.ps.512 + // APPLE-LABEL: test_mm512_fixupimm_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x i32> %0, i32 5, i16 -1, i32 8) + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_fixupimm_round_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x i32> %0, i32 5, i16 -1, i32 8) + // X64-NEXT: ret <16 x float> %1 return _mm512_fixupimm_round_ps(__A, __B, __C, 5, 8); } __m512 test_mm512_mask_fixupimm_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_fixupimm_round_ps - // CHECK: @llvm.x86.avx512.mask.fixupimm.ps.512 + // APPLE-LABEL: test_mm512_mask_fixupimm_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x i32> %0, i32 5, i16 %__U, i32 8) + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_fixupimm_round_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x i32> %0, i32 5, i16 %__U, i32 8) + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_fixupimm_round_ps(__A, __U, __B, __C, 5, 8); } __m512 test_mm512_fixupimm_ps(__m512 __A, __m512 __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_fixupimm_ps - // CHECK: @llvm.x86.avx512.mask.fixupimm.ps.512 + // APPLE-LABEL: test_mm512_fixupimm_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x i32> %0, i32 5, i16 -1, i32 4) + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_fixupimm_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x i32> %0, i32 5, i16 -1, i32 4) + // X64-NEXT: ret <16 x float> %1 return _mm512_fixupimm_ps(__A, __B, __C, 5); } __m512 test_mm512_mask_fixupimm_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_fixupimm_ps - // CHECK: @llvm.x86.avx512.mask.fixupimm.ps.512 + // APPLE-LABEL: test_mm512_mask_fixupimm_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x i32> %0, i32 5, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_fixupimm_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x i32> %0, i32 5, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_fixupimm_ps(__A, __U, __B, __C, 5); } __m512 test_mm512_maskz_fixupimm_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_fixupimm_round_ps - // CHECK: @llvm.x86.avx512.maskz.fixupimm.ps.512 + // APPLE-LABEL: test_mm512_maskz_fixupimm_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x i32> %0, i32 5, i16 %__U, i32 8) + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_fixupimm_round_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x i32> %0, i32 5, i16 %__U, i32 8) + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_fixupimm_round_ps(__U, __A, __B, __C, 5, 8); } __m512 test_mm512_maskz_fixupimm_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_fixupimm_ps - // CHECK: @llvm.x86.avx512.maskz.fixupimm.ps.512 + // APPLE-LABEL: test_mm512_maskz_fixupimm_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x i32> %0, i32 5, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_fixupimm_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x i32> %0, i32 5, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_fixupimm_ps(__U, __A, __B, __C, 5); } __m128d test_mm_fixupimm_round_sd(__m128d __A, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_fixupimm_round_sd - // CHECK: @llvm.x86.avx512.mask.fixupimm + // APPLE-LABEL: test_mm_fixupimm_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 -1, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_fixupimm_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 -1, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_fixupimm_round_sd(__A, __B, __C, 5, 8); } __m128d test_mm_mask_fixupimm_round_sd(__m128d __A, __mmask8 __U, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_fixupimm_round_sd - // CHECK: @llvm.x86.avx512.mask.fixupimm + // APPLE-LABEL: test_mm_mask_fixupimm_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_fixupimm_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_fixupimm_round_sd(__A, __U, __B, __C, 5, 8); } __m128d test_mm_fixupimm_sd(__m128d __A, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_fixupimm_sd - // CHECK: @llvm.x86.avx512.mask.fixupimm + // APPLE-LABEL: test_mm_fixupimm_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 -1, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_fixupimm_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 -1, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_fixupimm_sd(__A, __B, __C, 5); } __m128d test_mm_mask_fixupimm_sd(__m128d __A, __mmask8 __U, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_fixupimm_sd - // CHECK: @llvm.x86.avx512.mask.fixupimm + // APPLE-LABEL: test_mm_mask_fixupimm_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 %__U, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_fixupimm_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 %__U, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_fixupimm_sd(__A, __U, __B, __C, 5); } __m128d test_mm_maskz_fixupimm_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_fixupimm_round_sd - // CHECK: @llvm.x86.avx512.maskz.fixupimm + // APPLE-LABEL: test_mm_maskz_fixupimm_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_fixupimm_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_fixupimm_round_sd(__U, __A, __B, __C, 5, 8); } __m128d test_mm_maskz_fixupimm_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_fixupimm_sd - // CHECK: @llvm.x86.avx512.maskz.fixupimm + // APPLE-LABEL: test_mm_maskz_fixupimm_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 %__U, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_fixupimm_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 %__U, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_fixupimm_sd(__U, __A, __B, __C, 5); } __m128 test_mm_fixupimm_round_ss(__m128 __A, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_fixupimm_round_ss - // CHECK: @llvm.x86.avx512.mask.fixupimm + // APPLE-LABEL: test_mm_fixupimm_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // APPLE-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 -1, i32 8) + // APPLE-NEXT: ret <4 x float> %1 + // X64-LABEL: test_mm_fixupimm_round_ss + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 -1, i32 8) + // X64-NEXT: ret <4 x float> %1 return _mm_fixupimm_round_ss(__A, __B, __C, 5, 8); } __m128 test_mm_mask_fixupimm_round_ss(__m128 __A, __mmask8 __U, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_fixupimm_round_ss - // CHECK: @llvm.x86.avx512.mask.fixupimm + // APPLE-LABEL: test_mm_mask_fixupimm_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // APPLE-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %1 + // X64-LABEL: test_mm_mask_fixupimm_round_ss + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %1 return _mm_mask_fixupimm_round_ss(__A, __U, __B, __C, 5, 8); } __m128 test_mm_fixupimm_ss(__m128 __A, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_fixupimm_ss - // CHECK: @llvm.x86.avx512.mask.fixupimm + // APPLE-LABEL: test_mm_fixupimm_ss + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // APPLE-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 -1, i32 4) + // APPLE-NEXT: ret <4 x float> %1 + // X64-LABEL: test_mm_fixupimm_ss + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 -1, i32 4) + // X64-NEXT: ret <4 x float> %1 return _mm_fixupimm_ss(__A, __B, __C, 5); } __m128 test_mm_mask_fixupimm_ss(__m128 __A, __mmask8 __U, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_fixupimm_ss - // CHECK: @llvm.x86.avx512.mask.fixupimm + // APPLE-LABEL: test_mm_mask_fixupimm_ss + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // APPLE-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 %__U, i32 4) + // APPLE-NEXT: ret <4 x float> %1 + // X64-LABEL: test_mm_mask_fixupimm_ss + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 %__U, i32 4) + // X64-NEXT: ret <4 x float> %1 return _mm_mask_fixupimm_ss(__A, __U, __B, __C, 5); } __m128 test_mm_maskz_fixupimm_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_fixupimm_round_ss - // CHECK: @llvm.x86.avx512.maskz.fixupimm + // APPLE-LABEL: test_mm_maskz_fixupimm_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // APPLE-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %1 + // X64-LABEL: test_mm_maskz_fixupimm_round_ss + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_fixupimm_round_ss(__U, __A, __B, __C, 5, 8); } __m128 test_mm_maskz_fixupimm_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_fixupimm_ss - // CHECK: @llvm.x86.avx512.maskz.fixupimm + // APPLE-LABEL: test_mm_maskz_fixupimm_ss + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // APPLE-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 %__U, i32 4) + // APPLE-NEXT: ret <4 x float> %1 + // X64-LABEL: test_mm_maskz_fixupimm_ss + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 %__U, i32 4) + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_fixupimm_ss(__U, __A, __B, __C, 5); } __m128d test_mm_getexp_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_getexp_round_sd - // CHECK: @llvm.x86.avx512.mask.getexp.sd + // APPLE-LABEL: test_mm_getexp_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_getexp_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_getexp_round_sd(__A, __B, 8); } __m128d test_mm_getexp_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_getexp_sd - // CHECK: @llvm.x86.avx512.mask.getexp.sd + // APPLE-LABEL: test_mm_getexp_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 4) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_getexp_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 4) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_getexp_sd(__A, __B); } __m128 test_mm_getexp_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_getexp_round_ss - // CHECK: @llvm.x86.avx512.mask.getexp.ss + // APPLE-LABEL: test_mm_getexp_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_getexp_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_getexp_round_ss(__A, __B, 8); } __m128 test_mm_getexp_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_getexp_ss - // CHECK: @llvm.x86.avx512.mask.getexp.ss + // APPLE-LABEL: test_mm_getexp_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 4) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_getexp_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 4) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_getexp_ss(__A, __B); } __m128d test_mm_getmant_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_getmant_round_sd - // CHECK: @llvm.x86.avx512.mask.getmant.sd + // APPLE-LABEL: test_mm_getmant_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %__A, <2 x double> %__B, i32 0, <2 x double> zeroinitializer, i8 -1, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_getmant_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %__A, <2 x double> %__B, i32 0, <2 x double> zeroinitializer, i8 -1, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_getmant_round_sd(__A, __B, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_src, 8); } __m128d test_mm_getmant_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_getmant_sd - // CHECK: @llvm.x86.avx512.mask.getmant.sd + // APPLE-LABEL: test_mm_getmant_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %__A, <2 x double> %__B, i32 0, <2 x double> zeroinitializer, i8 -1, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_getmant_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %__A, <2 x double> %__B, i32 0, <2 x double> zeroinitializer, i8 -1, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_getmant_sd(__A, __B, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_src); } __m128 test_mm_getmant_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_getmant_round_ss - // CHECK: @llvm.x86.avx512.mask.getmant.ss + // APPLE-LABEL: test_mm_getmant_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %__A, <4 x float> %__B, i32 0, <4 x float> zeroinitializer, i8 -1, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_getmant_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %__A, <4 x float> %__B, i32 0, <4 x float> zeroinitializer, i8 -1, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_getmant_round_ss(__A, __B, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_src, 8); } __m128 test_mm_getmant_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_getmant_ss - // CHECK: @llvm.x86.avx512.mask.getmant.ss + // APPLE-LABEL: test_mm_getmant_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %__A, <4 x float> %__B, i32 0, <4 x float> zeroinitializer, i8 -1, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_getmant_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %__A, <4 x float> %__B, i32 0, <4 x float> zeroinitializer, i8 -1, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_getmant_ss(__A, __B, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_src); } __mmask16 test_mm512_kmov(__mmask16 __A) { - // CHECK-LABEL: @test_mm512_kmov - // CHECK: load i16, i16* %__A.addr.i, align 2 + // APPLE-LABEL: test_mm512_kmov + // APPLE: entry: + // APPLE-NEXT: ret i16 %__A + // X64-LABEL: test_mm512_kmov + // X64: entry: + // X64-NEXT: ret i16 %__A return _mm512_kmov(__A); } __m512d test_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_unpackhi_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_unpackhi_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_unpackhi_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_unpackhi_pd(__W, __U, __A, __B); } #if __x86_64__ unsigned long long test_mm_cvt_roundsd_si64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvt_roundsd_si64 - // CHECK: @llvm.x86.avx512.vcvtsd2si64 + // APPLE-LABEL: test_mm_cvt_roundsd_si64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %__A, i32 4) + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvt_roundsd_si64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %__A, i32 4) + // X64-NEXT: ret i64 %0 return _mm_cvt_roundsd_si64(__A, _MM_FROUND_CUR_DIRECTION); } #endif __m512i test_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask2_permutex2var_epi32 - // CHECK: @llvm.x86.avx512.vpermi2var.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask2_permutex2var_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__I to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) #12 + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %1 + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask2_permutex2var_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__I to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) #12 + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %1 + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask2_permutex2var_epi32(__A, __I, __U, __B); } __m512i test_mm512_unpackhi_epi32(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_unpackhi_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_unpackhi_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %shuffle.i = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // APPLE-NEXT: %2 = bitcast <16 x i32> %shuffle.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_unpackhi_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %shuffle.i = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // X64-NEXT: %2 = bitcast <16 x i32> %shuffle.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_unpackhi_epi32(__A, __B); } __m512d test_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_unpackhi_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_unpackhi_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_unpackhi_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_unpackhi_pd(__U, __A, __B); } #if __x86_64__ long long test_mm_cvt_roundsd_i64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvt_roundsd_i64 - // CHECK: @llvm.x86.avx512.vcvtsd2si64 + // APPLE-LABEL: test_mm_cvt_roundsd_i64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %__A, i32 4) + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvt_roundsd_i64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %__A, i32 4) + // X64-NEXT: ret i64 %0 return _mm_cvt_roundsd_i64(__A, _MM_FROUND_CUR_DIRECTION); } #endif __m512d test_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask2_permutex2var_pd - // CHECK: @llvm.x86.avx512.vpermi2var.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask2_permutex2var_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %__A, <8 x i64> %__I, <8 x double> %__B) #12 + // APPLE-NEXT: %1 = bitcast <8 x i64> %__I to <8 x double> + // APPLE-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x double> %0, <8 x double> %1 + // APPLE-NEXT: ret <8 x double> %3 + // X64-LABEL: test_mm512_mask2_permutex2var_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %__A, <8 x i64> %__I, <8 x double> %__B) #12 + // X64-NEXT: %1 = bitcast <8 x i64> %__I to <8 x double> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x double> %0, <8 x double> %1 + // X64-NEXT: ret <8 x double> %3 return _mm512_mask2_permutex2var_pd(__A, __I, __U, __B); } __m512i test_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_unpackhi_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_unpackhi_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %shuffle.i.i, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_unpackhi_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %shuffle.i.i, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_unpackhi_epi32(__W, __U, __A, __B); } __m512 test_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_unpackhi_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_unpackhi_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_unpackhi_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> %__W + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_unpackhi_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_unpackhi_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_unpackhi_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_unpackhi_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_unpackhi_ps(__U, __A, __B); } __m512d test_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_unpacklo_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_unpacklo_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_unpacklo_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_unpacklo_pd(__W, __U, __A, __B); } __m512d test_mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_unpacklo_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_unpacklo_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_unpacklo_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_unpacklo_pd(__U, __A, __B); } __m512 test_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_unpacklo_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_unpacklo_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_unpacklo_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> %__W + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_unpacklo_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_unpacklo_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_unpacklo_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_unpacklo_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_unpacklo_ps(__U, __A, __B); } int test_mm_cvt_roundsd_si32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvt_roundsd_si32 - // CHECK: @llvm.x86.avx512.vcvtsd2si32 + // APPLE-LABEL: test_mm_cvt_roundsd_si32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %__A, i32 4) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvt_roundsd_si32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %__A, i32 4) + // X64-NEXT: ret i32 %0 return _mm_cvt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvt_roundsd_i32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvt_roundsd_i32 - // CHECK: @llvm.x86.avx512.vcvtsd2si32 + // APPLE-LABEL: test_mm_cvt_roundsd_i32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %__A, i32 4) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvt_roundsd_i32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %__A, i32 4) + // X64-NEXT: ret i32 %0 return _mm_cvt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvt_roundsd_u32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvt_roundsd_u32 - // CHECK: @llvm.x86.avx512.vcvtsd2usi32 + // APPLE-LABEL: test_mm_cvt_roundsd_u32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %__A, i32 4) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvt_roundsd_u32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %__A, i32 4) + // X64-NEXT: ret i32 %0 return _mm_cvt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvtsd_u32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtsd_u32 - // CHECK: @llvm.x86.avx512.vcvtsd2usi32 + // APPLE-LABEL: test_mm_cvtsd_u32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %__A, i32 4) #12 + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvtsd_u32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %__A, i32 4) #12 + // X64-NEXT: ret i32 %0 return _mm_cvtsd_u32(__A); } #ifdef __x86_64__ unsigned long long test_mm_cvt_roundsd_u64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvt_roundsd_u64 - // CHECK: @llvm.x86.avx512.vcvtsd2usi64 + // APPLE-LABEL: test_mm_cvt_roundsd_u64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %__A, i32 4) + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvt_roundsd_u64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %__A, i32 4) + // X64-NEXT: ret i64 %0 return _mm_cvt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned long long test_mm_cvtsd_u64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtsd_u64 - // CHECK: @llvm.x86.avx512.vcvtsd2usi64 + // APPLE-LABEL: test_mm_cvtsd_u64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %__A, i32 4) #12 + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvtsd_u64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %__A, i32 4) #12 + // X64-NEXT: ret i64 %0 return _mm_cvtsd_u64(__A); } #endif int test_mm_cvt_roundss_si32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvt_roundss_si32 - // CHECK: @llvm.x86.avx512.vcvtss2si32 + // APPLE-LABEL: test_mm_cvt_roundss_si32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %__A, i32 4) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvt_roundss_si32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %__A, i32 4) + // X64-NEXT: ret i32 %0 return _mm_cvt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvt_roundss_i32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvt_roundss_i32 - // CHECK: @llvm.x86.avx512.vcvtss2si32 + // APPLE-LABEL: test_mm_cvt_roundss_i32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %__A, i32 4) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvt_roundss_i32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %__A, i32 4) + // X64-NEXT: ret i32 %0 return _mm_cvt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION); } #ifdef __x86_64__ long long test_mm_cvt_roundss_si64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvt_roundss_si64 - // CHECK: @llvm.x86.avx512.vcvtss2si64 + // APPLE-LABEL: test_mm_cvt_roundss_si64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %__A, i32 4) + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvt_roundss_si64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %__A, i32 4) + // X64-NEXT: ret i64 %0 return _mm_cvt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvt_roundss_i64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvt_roundss_i64 - // CHECK: @llvm.x86.avx512.vcvtss2si64 + // APPLE-LABEL: test_mm_cvt_roundss_i64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %__A, i32 4) + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvt_roundss_i64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %__A, i32 4) + // X64-NEXT: ret i64 %0 return _mm_cvt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION); } #endif unsigned test_mm_cvt_roundss_u32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvt_roundss_u32 - // CHECK: @llvm.x86.avx512.vcvtss2usi32 + // APPLE-LABEL: test_mm_cvt_roundss_u32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %__A, i32 4) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvt_roundss_u32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %__A, i32 4) + // X64-NEXT: ret i32 %0 return _mm_cvt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvtss_u32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtss_u32 - // CHECK: @llvm.x86.avx512.vcvtss2usi32 + // APPLE-LABEL: test_mm_cvtss_u32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %__A, i32 4) #12 + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvtss_u32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %__A, i32 4) #12 + // X64-NEXT: ret i32 %0 return _mm_cvtss_u32(__A); } #ifdef __x86_64__ unsigned long long test_mm_cvt_roundss_u64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvt_roundss_u64 - // CHECK: @llvm.x86.avx512.vcvtss2usi64 + // APPLE-LABEL: test_mm_cvt_roundss_u64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %__A, i32 4) + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvt_roundss_u64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %__A, i32 4) + // X64-NEXT: ret i64 %0 return _mm_cvt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned long long test_mm_cvtss_u64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtss_u64 - // CHECK: @llvm.x86.avx512.vcvtss2usi64 + // APPLE-LABEL: test_mm_cvtss_u64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %__A, i32 4) #12 + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvtss_u64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %__A, i32 4) #12 + // X64-NEXT: ret i64 %0 return _mm_cvtss_u64(__A); } #endif int test_mm_cvtt_roundsd_i32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtt_roundsd_i32 - // CHECK: @llvm.x86.avx512.cvttsd2si + // APPLE-LABEL: test_mm_cvtt_roundsd_i32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %__A, i32 4) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvtt_roundsd_i32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %__A, i32 4) + // X64-NEXT: ret i32 %0 return _mm_cvtt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvtt_roundsd_si32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtt_roundsd_si32 - // CHECK: @llvm.x86.avx512.cvttsd2si + // APPLE-LABEL: test_mm_cvtt_roundsd_si32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %__A, i32 4) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvtt_roundsd_si32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %__A, i32 4) + // X64-NEXT: ret i32 %0 return _mm_cvtt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvttsd_i32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttsd_i32 - // CHECK: @llvm.x86.avx512.cvttsd2si + // APPLE-LABEL: test_mm_cvttsd_i32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %__A, i32 4) #12 + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvttsd_i32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %__A, i32 4) #12 + // X64-NEXT: ret i32 %0 return _mm_cvttsd_i32(__A); } #ifdef __x86_64__ unsigned long long test_mm_cvtt_roundsd_si64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtt_roundsd_si64 - // CHECK: @llvm.x86.avx512.cvttsd2si64 + // APPLE-LABEL: test_mm_cvtt_roundsd_si64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %__A, i32 4) + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvtt_roundsd_si64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %__A, i32 4) + // X64-NEXT: ret i64 %0 return _mm_cvtt_roundsd_si64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvtt_roundsd_i64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtt_roundsd_i64 - // CHECK: @llvm.x86.avx512.cvttsd2si64 + // APPLE-LABEL: test_mm_cvtt_roundsd_i64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %__A, i32 4) + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvtt_roundsd_i64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %__A, i32 4) + // X64-NEXT: ret i64 %0 return _mm_cvtt_roundsd_i64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvttsd_i64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttsd_i64 - // CHECK: @llvm.x86.avx512.cvttsd2si64 + // APPLE-LABEL: test_mm_cvttsd_i64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %__A, i32 4) #12 + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvttsd_i64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %__A, i32 4) #12 + // X64-NEXT: ret i64 %0 return _mm_cvttsd_i64(__A); } #endif unsigned test_mm_cvtt_roundsd_u32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtt_roundsd_u32 - // CHECK: @llvm.x86.avx512.cvttsd2usi + // APPLE-LABEL: test_mm_cvtt_roundsd_u32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %__A, i32 4) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvtt_roundsd_u32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %__A, i32 4) + // X64-NEXT: ret i32 %0 return _mm_cvtt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvttsd_u32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttsd_u32 - // CHECK: @llvm.x86.avx512.cvttsd2usi + // APPLE-LABEL: test_mm_cvttsd_u32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %__A, i32 4) #12 + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvttsd_u32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %__A, i32 4) #12 + // X64-NEXT: ret i32 %0 return _mm_cvttsd_u32(__A); } #ifdef __x86_64__ unsigned long long test_mm_cvtt_roundsd_u64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtt_roundsd_u64 - // CHECK: @llvm.x86.avx512.cvttsd2usi64 + // APPLE-LABEL: test_mm_cvtt_roundsd_u64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %__A, i32 4) + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvtt_roundsd_u64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %__A, i32 4) + // X64-NEXT: ret i64 %0 return _mm_cvtt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned long long test_mm_cvttsd_u64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttsd_u64 - // CHECK: @llvm.x86.avx512.cvttsd2usi64 + // APPLE-LABEL: test_mm_cvttsd_u64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %__A, i32 4) #12 + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvttsd_u64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %__A, i32 4) #12 + // X64-NEXT: ret i64 %0 return _mm_cvttsd_u64(__A); } #endif int test_mm_cvtt_roundss_i32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtt_roundss_i32 - // CHECK: @llvm.x86.avx512.cvttss2si + // APPLE-LABEL: test_mm_cvtt_roundss_i32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %__A, i32 4) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvtt_roundss_i32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %__A, i32 4) + // X64-NEXT: ret i32 %0 return _mm_cvtt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvtt_roundss_si32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtt_roundss_si32 - // CHECK: @llvm.x86.avx512.cvttss2si + // APPLE-LABEL: test_mm_cvtt_roundss_si32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %__A, i32 4) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvtt_roundss_si32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %__A, i32 4) + // X64-NEXT: ret i32 %0 return _mm_cvtt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvttss_i32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttss_i32 - // CHECK: @llvm.x86.avx512.cvttss2si + // APPLE-LABEL: test_mm_cvttss_i32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %__A, i32 4) #12 + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvttss_i32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %__A, i32 4) #12 + // X64-NEXT: ret i32 %0 return _mm_cvttss_i32(__A); } #ifdef __x86_64__ float test_mm_cvtt_roundss_i64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtt_roundss_i64 - // CHECK: @llvm.x86.avx512.cvttss2si64 + // APPLE-LABEL: test_mm_cvtt_roundss_i64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %__A, i32 4) + // APPLE-NEXT: %conv = sitofp i64 %0 to float + // APPLE-NEXT: ret float %conv + // X64-LABEL: test_mm_cvtt_roundss_i64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %__A, i32 4) + // X64-NEXT: %conv = sitofp i64 %0 to float + // X64-NEXT: ret float %conv return _mm_cvtt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvtt_roundss_si64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtt_roundss_si64 - // CHECK: @llvm.x86.avx512.cvttss2si64 + // APPLE-LABEL: test_mm_cvtt_roundss_si64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %__A, i32 4) + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvtt_roundss_si64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %__A, i32 4) + // X64-NEXT: ret i64 %0 return _mm_cvtt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvttss_i64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttss_i64 - // CHECK: @llvm.x86.avx512.cvttss2si64 + // APPLE-LABEL: test_mm_cvttss_i64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %__A, i32 4) #12 + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvttss_i64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %__A, i32 4) #12 + // X64-NEXT: ret i64 %0 return _mm_cvttss_i64(__A); } #endif unsigned test_mm_cvtt_roundss_u32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtt_roundss_u32 - // CHECK: @llvm.x86.avx512.cvttss2usi + // APPLE-LABEL: test_mm_cvtt_roundss_u32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %__A, i32 4) + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvtt_roundss_u32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %__A, i32 4) + // X64-NEXT: ret i32 %0 return _mm_cvtt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvttss_u32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttss_u32 - // CHECK: @llvm.x86.avx512.cvttss2usi + // APPLE-LABEL: test_mm_cvttss_u32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %__A, i32 4) #12 + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvttss_u32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %__A, i32 4) #12 + // X64-NEXT: ret i32 %0 return _mm_cvttss_u32(__A); } #ifdef __x86_64__ unsigned long long test_mm_cvtt_roundss_u64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtt_roundss_u64 - // CHECK: @llvm.x86.avx512.cvttss2usi64 + // APPLE-LABEL: test_mm_cvtt_roundss_u64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %__A, i32 4) + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvtt_roundss_u64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %__A, i32 4) + // X64-NEXT: ret i64 %0 return _mm_cvtt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned long long test_mm_cvttss_u64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttss_u64 - // CHECK: @llvm.x86.avx512.cvttss2usi64 + // APPLE-LABEL: test_mm_cvttss_u64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %__A, i32 4) #12 + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvttss_u64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %__A, i32 4) #12 + // X64-NEXT: ret i64 %0 return _mm_cvttss_u64(__A); } #endif __m512i test_mm512_cvtt_roundps_epu32(__m512 __A) { - // CHECK-LABEL: @test_mm512_cvtt_roundps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 - return _mm512_cvtt_roundps_epu32(__A, _MM_FROUND_CUR_DIRECTION); + // APPLE-LABEL: test_mm512_cvtt_roundps_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 4) + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_cvtt_roundps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 4) + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 + return _mm512_cvtt_roundps_epu32(__A, _MM_FROUND_CUR_DIRECTION); } __m512i test_mm512_mask_cvtt_roundps_epu32(__m512i __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 - return _mm512_mask_cvtt_roundps_epu32(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); + // APPLE-LABEL: test_mm512_mask_cvtt_roundps_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 4) + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtt_roundps_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 4) + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 + return _mm512_mask_cvtt_roundps_epu32(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); } __m512i test_mm512_maskz_cvtt_roundps_epu32( __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 + // APPLE-LABEL: test_mm512_maskz_cvtt_roundps_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 4) + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtt_roundps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 4) + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 - return _mm512_maskz_cvtt_roundps_epu32(__U, __A, _MM_FROUND_CUR_DIRECTION); + return _mm512_maskz_cvtt_roundps_epu32(__U, __A, _MM_FROUND_CUR_DIRECTION); } __m256i test_mm512_cvt_roundps_ph(__m512 __A) { - // CHECK-LABEL: @test_mm512_cvt_roundps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 - return _mm512_cvt_roundps_ph(__A, _MM_FROUND_CUR_DIRECTION); + // APPLE-LABEL: test_mm512_cvt_roundps_ph + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %__A, i32 4, <16 x i16> zeroinitializer, i16 -1) + // APPLE-NEXT: %1 = bitcast <16 x i16> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_cvt_roundps_ph + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %__A, i32 4, <16 x i16> zeroinitializer, i16 -1) + // X64-NEXT: %1 = bitcast <16 x i16> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 + return _mm512_cvt_roundps_ph(__A, _MM_FROUND_CUR_DIRECTION); } __m256i test_mm512_mask_cvt_roundps_ph(__m256i __W , __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 - return _mm512_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); + // APPLE-LABEL: test_mm512_mask_cvt_roundps_ph + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__W to <16 x i16> + // APPLE-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %__A, i32 4, <16 x i16> %0, i16 %__U) + // APPLE-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_mask_cvt_roundps_ph + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__W to <16 x i16> + // X64-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %__A, i32 4, <16 x i16> %0, i16 %__U) + // X64-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 + return _mm512_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); } __m256i test_mm512_maskz_cvt_roundps_ph(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 - return _mm512_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); + // APPLE-LABEL: test_mm512_maskz_cvt_roundps_ph + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %__A, i32 4, <16 x i16> zeroinitializer, i16 %__U) + // APPLE-NEXT: %1 = bitcast <16 x i16> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvt_roundps_ph + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %__A, i32 4, <16 x i16> zeroinitializer, i16 %__U) + // X64-NEXT: %1 = bitcast <16 x i16> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 + return _mm512_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_cvt_roundph_ps(__m256i __A) { - // CHECK-LABEL: @test_mm512_cvt_roundph_ps - // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 - return _mm512_cvt_roundph_ps(__A, _MM_FROUND_CUR_DIRECTION); + // APPLE-LABEL: test_mm512_cvt_roundph_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %0, <16 x float> zeroinitializer, i16 -1, i32 4) + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_cvt_roundph_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %0, <16 x float> zeroinitializer, i16 -1, i32 4) + // X64-NEXT: ret <16 x float> %1 + return _mm512_cvt_roundph_ps(__A, _MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_mask_cvt_roundph_ps(__m512 __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundph_ps - // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 - return _mm512_mask_cvt_roundph_ps(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); + // APPLE-LABEL: test_mm512_mask_cvt_roundph_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %0, <16 x float> %__W, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_cvt_roundph_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %0, <16 x float> %__W, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %1 + return _mm512_mask_cvt_roundph_ps(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundph_ps - // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 - return _mm512_maskz_cvt_roundph_ps(__U, __A, _MM_FROUND_CUR_DIRECTION); + // APPLE-LABEL: test_mm512_maskz_cvt_roundph_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %0, <16 x float> zeroinitializer, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_cvt_roundph_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %0, <16 x float> zeroinitializer, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %1 + return _mm512_maskz_cvt_roundph_ps(__U, __A, _MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_cvt_roundepi32_ps( __m512i __A) { - // CHECK-LABEL: @test_mm512_cvt_roundepi32_ps - // CHECK: @llvm.x86.avx512.sitofp.round.v16f32.v16i32 + // APPLE-LABEL: test_mm512_cvt_roundepi32_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %0, i32 8) + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_cvt_roundepi32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %0, i32 8) + // X64-NEXT: ret <16 x float> %1 return _mm512_cvt_roundepi32_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_cvt_roundepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundepi32_ps - // CHECK: @llvm.x86.avx512.sitofp.round.v16f32.v16i32 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvt_roundepi32_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %0, i32 8) + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_mask_cvt_roundepi32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %0, i32 8) + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__W + // X64-NEXT: ret <16 x float> %3 return _mm512_mask_cvt_roundepi32_ps(__W,__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_cvt_roundepi32_ps(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi32_ps - // CHECK: @llvm.x86.avx512.sitofp.round.v16f32.v16i32 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvt_roundepi32_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %0, i32 8) + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_maskz_cvt_roundepi32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %0, i32 8) + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %3 return _mm512_maskz_cvt_roundepi32_ps(__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_cvt_roundepu32_ps(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvt_roundepu32_ps - // CHECK: @llvm.x86.avx512.uitofp.round.v16f32.v16i32 + // APPLE-LABEL: test_mm512_cvt_roundepu32_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %0, i32 8) + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_cvt_roundepu32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %0, i32 8) + // X64-NEXT: ret <16 x float> %1 return _mm512_cvt_roundepu32_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_cvt_roundepu32_ps(__m512 __W, __mmask16 __U,__m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundepu32_ps - // CHECK: @llvm.x86.avx512.uitofp.round.v16f32.v16i32 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvt_roundepu32_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %0, i32 8) + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_mask_cvt_roundepu32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %0, i32 8) + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__W + // X64-NEXT: ret <16 x float> %3 return _mm512_mask_cvt_roundepu32_ps(__W,__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_cvt_roundepu32_ps(__mmask16 __U,__m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu32_ps - // CHECK: @llvm.x86.avx512.uitofp.round.v16f32.v16i32 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvt_roundepu32_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %0, i32 8) + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_maskz_cvt_roundepu32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %0, i32 8) + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %3 return _mm512_maskz_cvt_roundepu32_ps(__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m256 test_mm512_cvt_roundpd_ps(__m512d A) { - // CHECK-LABEL: @test_mm512_cvt_roundpd_ps - // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 + // APPLE-LABEL: test_mm512_cvt_roundpd_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %A, <8 x float> zeroinitializer, i8 -1, i32 8) + // APPLE-NEXT: ret <8 x float> %0 + // X64-LABEL: test_mm512_cvt_roundpd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %A, <8 x float> zeroinitializer, i8 -1, i32 8) + // X64-NEXT: ret <8 x float> %0 return _mm512_cvt_roundpd_ps(A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m256 test_mm512_mask_cvt_roundpd_ps(__m256 W, __mmask8 U,__m512d A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_ps - // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 + // APPLE-LABEL: test_mm512_mask_cvt_roundpd_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %A, <8 x float> %W, i8 %U, i32 8) + // APPLE-NEXT: ret <8 x float> %0 + // X64-LABEL: test_mm512_mask_cvt_roundpd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %A, <8 x float> %W, i8 %U, i32 8) + // X64-NEXT: ret <8 x float> %0 return _mm512_mask_cvt_roundpd_ps(W,U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m256 test_mm512_maskz_cvt_roundpd_ps(__mmask8 U, __m512d A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_ps - // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 + // APPLE-LABEL: test_mm512_maskz_cvt_roundpd_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %A, <8 x float> zeroinitializer, i8 %U, i32 8) + // APPLE-NEXT: ret <8 x float> %0 + // X64-LABEL: test_mm512_maskz_cvt_roundpd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %A, <8 x float> zeroinitializer, i8 %U, i32 8) + // X64-NEXT: ret <8 x float> %0 return _mm512_maskz_cvt_roundpd_ps(U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m256i test_mm512_cvtt_roundpd_epi32(__m512d A) { - // CHECK-LABEL: @test_mm512_cvtt_roundpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 + // APPLE-LABEL: test_mm512_cvtt_roundpd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %A, <8 x i32> zeroinitializer, i8 -1, i32 8) + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_cvtt_roundpd_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %A, <8 x i32> zeroinitializer, i8 -1, i32 8) + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_cvtt_roundpd_epi32(A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m256i test_mm512_mask_cvtt_roundpd_epi32(__m256i W, __mmask8 U, __m512d A) { - // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 + // APPLE-LABEL: test_mm512_mask_cvtt_roundpd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %W to <8 x i32> + // APPLE-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %A, <8 x i32> %0, i8 %U, i32 8) + // APPLE-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtt_roundpd_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %W to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %A, <8 x i32> %0, i8 %U, i32 8) + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_mask_cvtt_roundpd_epi32(W,U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m256i test_mm512_maskz_cvtt_roundpd_epi32(__mmask8 U, __m512d A) { - // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 + // APPLE-LABEL: test_mm512_maskz_cvtt_roundpd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %A, <8 x i32> zeroinitializer, i8 %U, i32 8) + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtt_roundpd_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %A, <8 x i32> zeroinitializer, i8 %U, i32 8) + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_maskz_cvtt_roundpd_epi32(U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512i test_mm512_cvtt_roundps_epi32(__m512 A) { - // CHECK-LABEL: @test_mm512_cvtt_roundps_epi32 - // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 + // APPLE-LABEL: test_mm512_cvtt_roundps_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %A, <16 x i32> zeroinitializer, i16 -1, i32 8) + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_cvtt_roundps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %A, <16 x i32> zeroinitializer, i16 -1, i32 8) + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_cvtt_roundps_epi32(A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512i test_mm512_mask_cvtt_roundps_epi32(__m512i W,__mmask16 U, __m512 A) { - // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epi32 - // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 + // APPLE-LABEL: test_mm512_mask_cvtt_roundps_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %W to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %A, <16 x i32> %0, i16 %U, i32 8) + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtt_roundps_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %W to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %A, <16 x i32> %0, i16 %U, i32 8) + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_cvtt_roundps_epi32(W,U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512i test_mm512_maskz_cvtt_roundps_epi32(__mmask16 U, __m512 A) { - // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epi32 - // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 + // APPLE-LABEL: test_mm512_maskz_cvtt_roundps_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %A, <16 x i32> zeroinitializer, i16 %U, i32 8) + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtt_roundps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %A, <16 x i32> zeroinitializer, i16 %U, i32 8) + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_cvtt_roundps_epi32(U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512i test_mm512_cvt_roundps_epi32(__m512 __A) { - // CHECK-LABEL: @test_mm512_cvt_roundps_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 + // APPLE-LABEL: test_mm512_cvt_roundps_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 8) + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_cvt_roundps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 8) + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_cvt_roundps_epi32(__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512i test_mm512_mask_cvt_roundps_epi32(__m512i __W,__mmask16 __U,__m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 + // APPLE-LABEL: test_mm512_mask_cvt_roundps_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 8) + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvt_roundps_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 8) + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_cvt_roundps_epi32(__W,__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512i test_mm512_maskz_cvt_roundps_epi32(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 + // APPLE-LABEL: test_mm512_maskz_cvt_roundps_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 8) + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvt_roundps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 8) + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_cvt_roundps_epi32(__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m256i test_mm512_cvt_roundpd_epi32(__m512d A) { - // CHECK-LABEL: @test_mm512_cvt_roundpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 + // APPLE-LABEL: test_mm512_cvt_roundpd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %A, <8 x i32> zeroinitializer, i8 -1, i32 8) + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_cvt_roundpd_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %A, <8 x i32> zeroinitializer, i8 -1, i32 8) + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_cvt_roundpd_epi32(A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m256i test_mm512_mask_cvt_roundpd_epi32(__m256i W,__mmask8 U,__m512d A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 + // APPLE-LABEL: test_mm512_mask_cvt_roundpd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %W to <8 x i32> + // APPLE-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %A, <8 x i32> %0, i8 %U, i32 8) + // APPLE-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_mask_cvt_roundpd_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %W to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %A, <8 x i32> %0, i8 %U, i32 8) + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_mask_cvt_roundpd_epi32(W,U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m256i test_mm512_maskz_cvt_roundpd_epi32(__mmask8 U, __m512d A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 + // APPLE-LABEL: test_mm512_maskz_cvt_roundpd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %A, <8 x i32> zeroinitializer, i8 %U, i32 8) + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvt_roundpd_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %A, <8 x i32> zeroinitializer, i8 %U, i32 8) + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_maskz_cvt_roundpd_epi32(U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512i test_mm512_cvt_roundps_epu32(__m512 __A) { - // CHECK-LABEL: @test_mm512_cvt_roundps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 + // APPLE-LABEL: test_mm512_cvt_roundps_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 8) + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_cvt_roundps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 8) + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_cvt_roundps_epu32(__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512i test_mm512_mask_cvt_roundps_epu32(__m512i __W,__mmask16 __U,__m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 + // APPLE-LABEL: test_mm512_mask_cvt_roundps_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 8) + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvt_roundps_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 8) + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_cvt_roundps_epu32(__W,__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512i test_mm512_maskz_cvt_roundps_epu32(__mmask16 __U,__m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 + // APPLE-LABEL: test_mm512_maskz_cvt_roundps_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 8) + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvt_roundps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 8) + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_cvt_roundps_epu32(__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m256i test_mm512_cvt_roundpd_epu32(__m512d A) { - // CHECK-LABEL: @test_mm512_cvt_roundpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 + // APPLE-LABEL: test_mm512_cvt_roundpd_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %A, <8 x i32> zeroinitializer, i8 -1, i32 8) + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_cvt_roundpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %A, <8 x i32> zeroinitializer, i8 -1, i32 8) + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_cvt_roundpd_epu32(A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m256i test_mm512_mask_cvt_roundpd_epu32(__m256i W, __mmask8 U, __m512d A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 + // APPLE-LABEL: test_mm512_mask_cvt_roundpd_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %W to <8 x i32> + // APPLE-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %A, <8 x i32> %0, i8 %U, i32 8) + // APPLE-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_mask_cvt_roundpd_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %W to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %A, <8 x i32> %0, i8 %U, i32 8) + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_mask_cvt_roundpd_epu32(W,U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m256i test_mm512_maskz_cvt_roundpd_epu32(__mmask8 U, __m512d A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 + // APPLE-LABEL: test_mm512_maskz_cvt_roundpd_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %A, <8 x i32> zeroinitializer, i8 %U, i32 8) + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvt_roundpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %A, <8 x i32> zeroinitializer, i8 %U, i32 8) + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_maskz_cvt_roundpd_epu32(U, A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask2_permutex2var_ps - // CHECK: @llvm.x86.avx512.vpermi2var.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask2_permutex2var_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__I to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %__A, <16 x i32> %0, <16 x float> %__B) #12 + // APPLE-NEXT: %2 = bitcast <8 x i64> %__I to <16 x float> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x float> %1, <16 x float> %2 + // APPLE-NEXT: ret <16 x float> %4 + // X64-LABEL: test_mm512_mask2_permutex2var_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__I to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %__A, <16 x i32> %0, <16 x float> %__B) #12 + // X64-NEXT: %2 = bitcast <8 x i64> %__I to <16 x float> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x float> %1, <16 x float> %2 + // X64-NEXT: ret <16 x float> %4 return _mm512_mask2_permutex2var_ps(__A, __I, __U, __B); } __m512i test_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask2_permutex2var_epi64 - // CHECK: @llvm.x86.avx512.vpermi2var.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask2_permutex2var_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__I + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask2_permutex2var_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__I + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask2_permutex2var_epi64(__A, __I, __U, __B); } __m512d test_mm512_permute_pd(__m512d __X) { - // CHECK-LABEL: @test_mm512_permute_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> + // APPLE-LABEL: test_mm512_permute_pd + // APPLE: entry: + // APPLE-NEXT: %permil = shufflevector <8 x double> %__X, <8 x double> undef, <8 x i32> + // APPLE-NEXT: ret <8 x double> %permil + // X64-LABEL: test_mm512_permute_pd + // X64: entry: + // X64-NEXT: %permil = shufflevector <8 x double> %__X, <8 x double> undef, <8 x i32> + // X64-NEXT: ret <8 x double> %permil return _mm512_permute_pd(__X, 2); } __m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) { - // CHECK-LABEL: @test_mm512_mask_permute_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permute_pd + // APPLE: entry: + // APPLE-NEXT: %permil = shufflevector <8 x double> %__X, <8 x double> undef, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %permil, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_permute_pd + // X64: entry: + // X64-NEXT: %permil = shufflevector <8 x double> %__X, <8 x double> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %permil, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_permute_pd(__W, __U, __X, 2); } __m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) { - // CHECK-LABEL: @test_mm512_maskz_permute_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permute_pd + // APPLE: entry: + // APPLE-NEXT: %permil = shufflevector <8 x double> %__X, <8 x double> undef, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %permil, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_permute_pd + // X64: entry: + // X64-NEXT: %permil = shufflevector <8 x double> %__X, <8 x double> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %permil, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_permute_pd(__U, __X, 2); } __m512 test_mm512_permute_ps(__m512 __X) { - // CHECK-LABEL: @test_mm512_permute_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <16 x i32> + // APPLE-LABEL: test_mm512_permute_ps + // APPLE: entry: + // APPLE-NEXT: %permil = shufflevector <16 x float> %__X, <16 x float> undef, <16 x i32> + // APPLE-NEXT: ret <16 x float> %permil + // X64-LABEL: test_mm512_permute_ps + // X64: entry: + // X64-NEXT: %permil = shufflevector <16 x float> %__X, <16 x float> undef, <16 x i32> + // X64-NEXT: ret <16 x float> %permil return _mm512_permute_ps(__X, 2); } __m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) { - // CHECK-LABEL: @test_mm512_mask_permute_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permute_ps + // APPLE: entry: + // APPLE-NEXT: %permil = shufflevector <16 x float> %__X, <16 x float> undef, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %permil, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_permute_ps + // X64: entry: + // X64-NEXT: %permil = shufflevector <16 x float> %__X, <16 x float> undef, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %permil, <16 x float> %__W + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_permute_ps(__W, __U, __X, 2); } __m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) { - // CHECK-LABEL: @test_mm512_maskz_permute_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permute_ps + // APPLE: entry: + // APPLE-NEXT: %permil = shufflevector <16 x float> %__X, <16 x float> undef, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %permil, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_permute_ps + // X64: entry: + // X64-NEXT: %permil = shufflevector <16 x float> %__X, <16 x float> undef, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %permil, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_permute_ps(__U, __X, 2); } __m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) { - // CHECK-LABEL: @test_mm512_permutevar_pd - // CHECK: @llvm.x86.avx512.vpermilvar.pd.512 + // APPLE-LABEL: test_mm512_permutevar_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %__A, <8 x i64> %__C) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_permutevar_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %__A, <8 x i64> %__C) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_permutevar_pd(__A, __C); } __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_permutevar_pd - // CHECK: @llvm.x86.avx512.vpermilvar.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permutevar_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %__A, <8 x i64> %__C) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_permutevar_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %__A, <8 x i64> %__C) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_permutevar_pd(__W, __U, __A, __C); } __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_permutevar_pd - // CHECK: @llvm.x86.avx512.vpermilvar.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permutevar_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %__A, <8 x i64> %__C) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_permutevar_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %__A, <8 x i64> %__C) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_permutevar_pd(__U, __A, __C); } __m512 test_mm512_permutevar_ps(__m512 __A, __m512i __C) { - // CHECK-LABEL: @test_mm512_permutevar_ps - // CHECK: @llvm.x86.avx512.vpermilvar.ps.512 + // APPLE-LABEL: test_mm512_permutevar_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %__A, <16 x i32> %0) #12 + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_permutevar_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %__A, <16 x i32> %0) #12 + // X64-NEXT: ret <16 x float> %1 return _mm512_permutevar_ps(__A, __C); } __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_permutevar_ps - // CHECK: @llvm.x86.avx512.vpermilvar.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permutevar_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %__A, <16 x i32> %0) #12 + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_mask_permutevar_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %__A, <16 x i32> %0) #12 + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__W + // X64-NEXT: ret <16 x float> %3 return _mm512_mask_permutevar_ps(__W, __U, __A, __C); } __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_permutevar_ps - // CHECK: @llvm.x86.avx512.vpermilvar.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permutevar_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %__A, <16 x i32> %0) #12 + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_maskz_permutevar_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %__A, <16 x i32> %0) #12 + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %3 return _mm512_maskz_permutevar_ps(__U, __A, __C); } __m512i test_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) { - // CHECK-LABEL: @test_mm512_permutex2var_epi32 - // CHECK: @llvm.x86.avx512.vpermi2var.d.512 + // APPLE-LABEL: test_mm512_permutex2var_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__I to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) #12 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_permutex2var_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__I to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) #12 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_permutex2var_epi32(__A, __I, __B); } __m512i test_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_permutex2var_epi32 - // CHECK: @llvm.x86.avx512.vpermi2var.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permutex2var_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__I to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) #12 + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_maskz_permutex2var_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__I to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) #12 + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_maskz_permutex2var_epi32(__U, __A, __I, __B); } __m512i test_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, __m512i __I, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_permutex2var_epi32 - // CHECK: @llvm.x86.avx512.vpermi2var.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permutex2var_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__I to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) #12 + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0 + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask_permutex2var_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__I to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) #12 + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0 + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask_permutex2var_epi32 (__A,__U,__I,__B); } __m512d test_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B) { - // CHECK-LABEL: @test_mm512_permutex2var_pd - // CHECK: @llvm.x86.avx512.vpermi2var.pd.512 + // APPLE-LABEL: test_mm512_permutex2var_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %__A, <8 x i64> %__I, <8 x double> %__B) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_permutex2var_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %__A, <8 x i64> %__I, <8 x double> %__B) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_permutex2var_pd (__A, __I,__B); } __m512d test_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_permutex2var_pd - // CHECK: @llvm.x86.avx512.vpermi2var.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permutex2var_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %__A, <8 x i64> %__I, <8 x double> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_permutex2var_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %__A, <8 x i64> %__I, <8 x double> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_permutex2var_pd (__A,__U,__I,__B); } __m512d test_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_permutex2var_pd - // CHECK: @llvm.x86.avx512.vpermi2var.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permutex2var_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %__A, <8 x i64> %__I, <8 x double> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_permutex2var_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %__A, <8 x i64> %__I, <8 x double> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_permutex2var_pd(__U, __A, __I, __B); } __m512 test_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B) { - // CHECK-LABEL: @test_mm512_permutex2var_ps - // CHECK: @llvm.x86.avx512.vpermi2var.ps.512 + // APPLE-LABEL: test_mm512_permutex2var_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__I to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %__A, <16 x i32> %0, <16 x float> %__B) #12 + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_permutex2var_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__I to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %__A, <16 x i32> %0, <16 x float> %__B) #12 + // X64-NEXT: ret <16 x float> %1 return _mm512_permutex2var_ps (__A, __I, __B); } __m512 test_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_permutex2var_ps - // CHECK: @llvm.x86.avx512.vpermi2var.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permutex2var_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__I to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %__A, <16 x i32> %0, <16 x float> %__B) #12 + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__A + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_mask_permutex2var_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__I to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %__A, <16 x i32> %0, <16 x float> %__B) #12 + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__A + // X64-NEXT: ret <16 x float> %3 return _mm512_mask_permutex2var_ps (__A,__U,__I,__B); } __m512 test_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_permutex2var_ps - // CHECK: @llvm.x86.avx512.vpermi2var.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permutex2var_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__I to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %__A, <16 x i32> %0, <16 x float> %__B) #12 + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_maskz_permutex2var_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__I to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %__A, <16 x i32> %0, <16 x float> %__B) #12 + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %3 return _mm512_maskz_permutex2var_ps(__U, __A, __I, __B); } __m512i test_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B){ - // CHECK-LABEL: @test_mm512_permutex2var_epi64 - // CHECK: @llvm.x86.avx512.vpermi2var.q.512 + // APPLE-LABEL: test_mm512_permutex2var_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) #12 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_permutex2var_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) #12 + // X64-NEXT: ret <8 x i64> %0 return _mm512_permutex2var_epi64(__A, __I, __B); } __m512i test_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I, __m512i __B){ - // CHECK-LABEL: @test_mm512_mask_permutex2var_epi64 - // CHECK: @llvm.x86.avx512.vpermi2var.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permutex2var_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_permutex2var_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_permutex2var_epi64(__A, __U, __I, __B); } __m512i test_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_permutex2var_epi64 - // CHECK: @llvm.x86.avx512.vpermi2var.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permutex2var_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_permutex2var_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_permutex2var_epi64(__U, __A, __I, __B); } __mmask16 test_mm512_testn_epi32_mask(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_testn_epi32_mask - // CHECK: and <16 x i32> %{{.*}}, %{{.*}} - // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_testn_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %and1.i.i = and <8 x i64> %__B, %__A + // APPLE-NEXT: %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> + // APPLE-NEXT: %1 = icmp eq <16 x i32> %0, zeroinitializer + // APPLE-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // APPLE-NEXT: ret i16 %2 + // X64-LABEL: test_mm512_testn_epi32_mask + // X64: entry: + // X64-NEXT: %and1.i.i = and <8 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> + // X64-NEXT: %1 = icmp eq <16 x i32> %0, zeroinitializer + // X64-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // X64-NEXT: ret i16 %2 return _mm512_testn_epi32_mask(__A, __B); } __mmask16 test_mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_testn_epi32_mask - // CHECK: and <16 x i32> %{{.*}}, %{{.*}} - // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_testn_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %and1.i.i = and <8 x i64> %__B, %__A + // APPLE-NEXT: %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> + // APPLE-NEXT: %1 = icmp eq <16 x i32> %0, zeroinitializer + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = and <16 x i1> %1, %2 + // APPLE-NEXT: %4 = bitcast <16 x i1> %3 to i16 + // APPLE-NEXT: ret i16 %4 + // X64-LABEL: test_mm512_mask_testn_epi32_mask + // X64: entry: + // X64-NEXT: %and1.i.i = and <8 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> + // X64-NEXT: %1 = icmp eq <16 x i32> %0, zeroinitializer + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = and <16 x i1> %1, %2 + // X64-NEXT: %4 = bitcast <16 x i1> %3 to i16 + // X64-NEXT: ret i16 %4 return _mm512_mask_testn_epi32_mask(__U, __A, __B); } __mmask8 test_mm512_testn_epi64_mask(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_testn_epi64_mask - // CHECK: and <16 x i32> %{{.*}}, %{{.*}} - // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_testn_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %and1.i.i = and <8 x i64> %__B, %__A + // APPLE-NEXT: %0 = icmp eq <8 x i64> %and1.i.i, zeroinitializer + // APPLE-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // APPLE-NEXT: ret i8 %1 + // X64-LABEL: test_mm512_testn_epi64_mask + // X64: entry: + // X64-NEXT: %and1.i.i = and <8 x i64> %__B, %__A + // X64-NEXT: %0 = icmp eq <8 x i64> %and1.i.i, zeroinitializer + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm512_testn_epi64_mask(__A, __B); } __mmask8 test_mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_testn_epi64_mask - // CHECK: and <16 x i32> %{{.*}}, %{{.*}} - // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_testn_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %and1.i.i = and <8 x i64> %__B, %__A + // APPLE-NEXT: %0 = icmp eq <8 x i64> %and1.i.i, zeroinitializer + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_testn_epi64_mask + // X64: entry: + // X64-NEXT: %and1.i.i = and <8 x i64> %__B, %__A + // X64-NEXT: %0 = icmp eq <8 x i64> %and1.i.i, zeroinitializer + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_testn_epi64_mask(__U, __A, __B); } __mmask16 test_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_test_epi32_mask - // CHECK: and <16 x i32> %{{.*}}, %{{.*}} - // CHECK: icmp ne <16 x i32> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_test_epi32_mask + // APPLE: entry: + // APPLE-NEXT: %and1.i.i = and <8 x i64> %__B, %__A + // APPLE-NEXT: %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> + // APPLE-NEXT: %1 = icmp ne <16 x i32> %0, zeroinitializer + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = and <16 x i1> %1, %2 + // APPLE-NEXT: %4 = bitcast <16 x i1> %3 to i16 + // APPLE-NEXT: ret i16 %4 + // X64-LABEL: test_mm512_mask_test_epi32_mask + // X64: entry: + // X64-NEXT: %and1.i.i = and <8 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> + // X64-NEXT: %1 = icmp ne <16 x i32> %0, zeroinitializer + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = and <16 x i1> %1, %2 + // X64-NEXT: %4 = bitcast <16 x i1> %3 to i16 + // X64-NEXT: ret i16 %4 return _mm512_mask_test_epi32_mask (__U,__A,__B); } __mmask8 test_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_test_epi64_mask - // CHECK: and <16 x i32> %{{.*}}, %{{.*}} - // CHECK: icmp ne <8 x i64> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // APPLE-LABEL: test_mm512_mask_test_epi64_mask + // APPLE: entry: + // APPLE-NEXT: %and1.i.i = and <8 x i64> %__B, %__A + // APPLE-NEXT: %0 = icmp ne <8 x i64> %and1.i.i, zeroinitializer + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = and <8 x i1> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // APPLE-NEXT: ret i8 %3 + // X64-LABEL: test_mm512_mask_test_epi64_mask + // X64: entry: + // X64-NEXT: %and1.i.i = and <8 x i64> %__B, %__A + // X64-NEXT: %0 = icmp ne <8 x i64> %and1.i.i, zeroinitializer + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm512_mask_test_epi64_mask (__U,__A,__B); } __m512i test_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_unpackhi_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_unpackhi_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %shuffle.i.i, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_unpackhi_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %shuffle.i.i, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_unpackhi_epi32(__U, __A, __B); } __m512i test_mm512_unpackhi_epi64(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_unpackhi_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_unpackhi_epi64 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // APPLE-NEXT: ret <8 x i64> %shuffle.i + // X64-LABEL: test_mm512_unpackhi_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // X64-NEXT: ret <8 x i64> %shuffle.i return _mm512_unpackhi_epi64(__A, __B); } __m512i test_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_unpackhi_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_unpackhi_epi64 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuffle.i.i, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_unpackhi_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuffle.i.i, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_unpackhi_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_unpackhi_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_unpackhi_epi64 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuffle.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_unpackhi_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuffle.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_unpackhi_epi64(__U, __A, __B); } __m512i test_mm512_unpacklo_epi32(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_unpacklo_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_unpacklo_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %shuffle.i = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // APPLE-NEXT: %2 = bitcast <16 x i32> %shuffle.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_unpacklo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %shuffle.i = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // X64-NEXT: %2 = bitcast <16 x i32> %shuffle.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_unpacklo_epi32(__A, __B); } __m512i test_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_unpacklo_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_unpacklo_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %shuffle.i.i, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_unpacklo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %shuffle.i.i, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_unpacklo_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_unpacklo_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_unpacklo_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %shuffle.i.i, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_unpacklo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %shuffle.i.i, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_unpacklo_epi32(__U, __A, __B); } __m512i test_mm512_unpacklo_epi64(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_unpacklo_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_unpacklo_epi64 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // APPLE-NEXT: ret <8 x i64> %shuffle.i + // X64-LABEL: test_mm512_unpacklo_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // X64-NEXT: ret <8 x i64> %shuffle.i return _mm512_unpacklo_epi64(__A, __B); } __m512i test_mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_unpacklo_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_unpacklo_epi64 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuffle.i.i, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_unpacklo_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuffle.i.i, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_unpacklo_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_unpacklo_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_unpacklo_epi64 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuffle.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_unpacklo_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuffle.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_unpacklo_epi64(__U, __A, __B); } __m128d test_mm_roundscale_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_roundscale_round_sd - // CHECK: @llvm.x86.avx512.mask.rndscale.sd + // APPLE-LABEL: test_mm_roundscale_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 3, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_roundscale_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 3, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_roundscale_round_sd(__A, __B, 3, _MM_FROUND_CUR_DIRECTION); } __m128d test_mm_roundscale_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_roundscale_sd - // CHECK: @llvm.x86.avx512.mask.rndscale.sd + // APPLE-LABEL: test_mm_roundscale_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 3, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_roundscale_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 3, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_roundscale_sd(__A, __B, 3); } __m128d test_mm_mask_roundscale_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK: @llvm.x86.avx512.mask.rndscale.sd - return _mm_mask_roundscale_sd(__W,__U,__A,__B,3); + // APPLE-LABEL: test_mm_mask_roundscale_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 3, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_roundscale_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 3, i32 4) + // X64-NEXT: ret <2 x double> %0 + return _mm_mask_roundscale_sd(__W, __U, __A, __B, 3); } __m128d test_mm_mask_roundscale_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK: @llvm.x86.avx512.mask.rndscale.sd - return _mm_mask_roundscale_round_sd(__W,__U,__A,__B,3,_MM_FROUND_CUR_DIRECTION); + // APPLE-LABEL: test_mm_mask_roundscale_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 3, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_roundscale_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 3, i32 4) + // X64-NEXT: ret <2 x double> %0 + return _mm_mask_roundscale_round_sd(__W, __U, __A, __B, 3, _MM_FROUND_CUR_DIRECTION); } __m128d test_mm_maskz_roundscale_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK: @llvm.x86.avx512.mask.rndscale.sd - return _mm_maskz_roundscale_sd(__U,__A,__B,3); + // APPLE-LABEL: test_mm_maskz_roundscale_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 3, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_roundscale_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 3, i32 4) + // X64-NEXT: ret <2 x double> %0 + return _mm_maskz_roundscale_sd(__U, __A, __B, 3); } __m128d test_mm_maskz_roundscale_round_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK: @llvm.x86.avx512.mask.rndscale.sd - return _mm_maskz_roundscale_round_sd(__U,__A,__B,3,_MM_FROUND_CUR_DIRECTION ); + // APPLE-LABEL: test_mm_maskz_roundscale_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 3, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_roundscale_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 3, i32 4) + // X64-NEXT: ret <2 x double> %0 + return _mm_maskz_roundscale_round_sd(__U, __A, __B, 3, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_roundscale_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_roundscale_round_ss - // CHECK: @llvm.x86.avx512.mask.rndscale.ss + // APPLE-LABEL: test_mm_roundscale_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 3, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_roundscale_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 3, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_roundscale_round_ss(__A, __B, 3, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_roundscale_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_roundscale_ss - // CHECK: @llvm.x86.avx512.mask.rndscale.ss + // APPLE-LABEL: test_mm_roundscale_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 3, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_roundscale_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 3, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_roundscale_ss(__A, __B, 3); } __m128 test_mm_mask_roundscale_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_roundscale_ss - // CHECK: @llvm.x86.avx512.mask.rndscale.ss - return _mm_mask_roundscale_ss(__W,__U,__A,__B,3); + // APPLE-LABEL: test_mm_mask_roundscale_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 3, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_roundscale_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 3, i32 4) + // X64-NEXT: ret <4 x float> %0 + return _mm_mask_roundscale_ss(__W, __U, __A, __B, 3); } __m128 test_mm_maskz_roundscale_round_ss( __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_roundscale_round_ss - // CHECK: @llvm.x86.avx512.mask.rndscale.ss - return _mm_maskz_roundscale_round_ss(__U,__A,__B,3,_MM_FROUND_CUR_DIRECTION); + // APPLE-LABEL: test_mm_maskz_roundscale_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 3, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_roundscale_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 3, i32 4) + // X64-NEXT: ret <4 x float> %0 + return _mm_maskz_roundscale_round_ss(__U, __A, __B, 3, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_maskz_roundscale_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_roundscale_ss - // CHECK: @llvm.x86.avx512.mask.rndscale.ss - return _mm_maskz_roundscale_ss(__U,__A,__B,3); + // APPLE-LABEL: test_mm_maskz_roundscale_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 3, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_roundscale_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 3, i32 4) + // X64-NEXT: ret <4 x float> %0 + return _mm_maskz_roundscale_ss(__U, __A, __B, 3); } __m512d test_mm512_scalef_round_pd(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_scalef_round_pd - // CHECK: @llvm.x86.avx512.mask.scalef.pd.512 + // APPLE-LABEL: test_mm512_scalef_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> zeroinitializer, i8 -1, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_scalef_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> zeroinitializer, i8 -1, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_scalef_round_pd(__A, __B, _MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_mask_scalef_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_scalef_round_pd - // CHECK: @llvm.x86.avx512.mask.scalef.pd.512 + // APPLE-LABEL: test_mm512_mask_scalef_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_scalef_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_scalef_round_pd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_maskz_scalef_round_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_scalef_round_pd - // CHECK: @llvm.x86.avx512.mask.scalef.pd.512 + // APPLE-LABEL: test_mm512_maskz_scalef_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_maskz_scalef_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_maskz_scalef_round_pd(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_scalef_pd(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_scalef_pd - // CHECK: @llvm.x86.avx512.mask.scalef.pd.512 + // APPLE-LABEL: test_mm512_scalef_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> zeroinitializer, i8 -1, i32 4) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_scalef_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> zeroinitializer, i8 -1, i32 4) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_scalef_pd(__A, __B); } __m512d test_mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_scalef_pd - // CHECK: @llvm.x86.avx512.mask.scalef.pd.512 + // APPLE-LABEL: test_mm512_mask_scalef_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_scalef_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_scalef_pd(__W, __U, __A, __B); } __m512d test_mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_scalef_pd - // CHECK: @llvm.x86.avx512.mask.scalef.pd.512 + // APPLE-LABEL: test_mm512_maskz_scalef_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> zeroinitializer, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_maskz_scalef_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> zeroinitializer, i8 %__U, i32 4) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_maskz_scalef_pd(__U, __A, __B); } __m512 test_mm512_scalef_round_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_scalef_round_ps - // CHECK: @llvm.x86.avx512.mask.scalef.ps.512 + // APPLE-LABEL: test_mm512_scalef_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> zeroinitializer, i16 -1, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_scalef_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> zeroinitializer, i16 -1, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_scalef_round_ps(__A, __B, _MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_mask_scalef_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_scalef_round_ps - // CHECK: @llvm.x86.avx512.mask.scalef.ps.512 + // APPLE-LABEL: test_mm512_mask_scalef_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__W, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_mask_scalef_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__W, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_mask_scalef_round_ps(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_maskz_scalef_round_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_scalef_round_ps - // CHECK: @llvm.x86.avx512.mask.scalef.ps.512 + // APPLE-LABEL: test_mm512_maskz_scalef_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> zeroinitializer, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_maskz_scalef_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> zeroinitializer, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_maskz_scalef_round_ps(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_scalef_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_scalef_ps - // CHECK: @llvm.x86.avx512.mask.scalef.ps.512 + // APPLE-LABEL: test_mm512_scalef_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> zeroinitializer, i16 -1, i32 4) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_scalef_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> zeroinitializer, i16 -1, i32 4) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_scalef_ps(__A, __B); } __m512 test_mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_scalef_ps - // CHECK: @llvm.x86.avx512.mask.scalef.ps.512 + // APPLE-LABEL: test_mm512_mask_scalef_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__W, i16 %__U, i32 4) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_mask_scalef_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__W, i16 %__U, i32 4) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_mask_scalef_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_scalef_ps - // CHECK: @llvm.x86.avx512.mask.scalef.ps.512 + // APPLE-LABEL: test_mm512_maskz_scalef_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> zeroinitializer, i16 %__U, i32 4) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_maskz_scalef_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> zeroinitializer, i16 %__U, i32 4) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_maskz_scalef_ps(__U, __A, __B); } __m128d test_mm_scalef_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_scalef_round_sd - // CHECK: @llvm.x86.avx512.mask.scalef.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %2, i8 -1, i32 8) + // APPLE-LABEL: test_mm_scalef_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_scalef_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_scalef_round_sd(__A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_scalef_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_scalef_sd - // CHECK: @llvm.x86.avx512.mask.scalef + // APPLE-LABEL: test_mm_scalef_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 4) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_scalef_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 4) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_scalef_sd(__A, __B); } __m128d test_mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_scalef_sd - // CHECK: @llvm.x86.avx512.mask.scalef.sd + // APPLE-LABEL: test_mm_mask_scalef_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_scalef_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_mask_scalef_sd(__W, __U, __A, __B); } __m128d test_mm_mask_scalef_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_scalef_round_sd - // CHECK: @llvm.x86.avx512.mask.scalef.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8) - return _mm_mask_scalef_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + // APPLE-LABEL: test_mm_mask_scalef_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_scalef_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 + return _mm_mask_scalef_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_scalef_sd - // CHECK: @llvm.x86.avx512.mask.scalef.sd - return _mm_maskz_scalef_sd(__U, __A, __B); + // APPLE-LABEL: test_mm_maskz_scalef_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_scalef_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 4) #12 + // X64-NEXT: ret <2 x double> %0 + return _mm_maskz_scalef_sd(__U, __A, __B); } __m128d test_mm_maskz_scalef_round_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_scalef_round_sd - // CHECK: @llvm.x86.avx512.mask.scalef.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8) - return _mm_maskz_scalef_round_sd(__U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + // APPLE-LABEL: test_mm_maskz_scalef_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_scalef_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 + return _mm_maskz_scalef_round_sd(__U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_scalef_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_scalef_round_ss - // CHECK: @llvm.x86.avx512.mask.scalef.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 -1, i32 8) + // APPLE-LABEL: test_mm_scalef_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_scalef_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_scalef_round_ss(__A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_scalef_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_scalef_ss - // CHECK: @llvm.x86.avx512.mask.scalef.ss + // APPLE-LABEL: test_mm_scalef_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 4) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_scalef_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 4) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_scalef_ss(__A, __B); } __m128 test_mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_scalef_ss - // CHECK: @llvm.x86.avx512.mask.scalef.ss - return _mm_mask_scalef_ss(__W, __U, __A, __B); + // APPLE-LABEL: test_mm_mask_scalef_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_scalef_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <4 x float> %0 + return _mm_mask_scalef_ss(__W, __U, __A, __B); } __m128 test_mm_mask_scalef_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_scalef_round_ss - // CHECK: @llvm.x86.avx512.mask.scalef.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8) - return _mm_mask_scalef_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + // APPLE-LABEL: test_mm_mask_scalef_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_scalef_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 + return _mm_mask_scalef_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_scalef_ss - // CHECK: @llvm.x86.avx512.mask.scalef.ss - return _mm_maskz_scalef_ss(__U, __A, __B); + // APPLE-LABEL: test_mm_maskz_scalef_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_scalef_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 4) #12 + // X64-NEXT: ret <4 x float> %0 + return _mm_maskz_scalef_ss(__U, __A, __B); } __m128 test_mm_maskz_scalef_round_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_scalef_round_ss - // CHECK: @llvm.x86.avx512.mask.scalef.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8) - return _mm_maskz_scalef_round_ss(__U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + // APPLE-LABEL: test_mm_maskz_scalef_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_scalef_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 + return _mm_maskz_scalef_round_ss(__U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512i test_mm512_srai_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_srai_epi32 - // CHECK: @llvm.x86.avx512.psrai.d.512 + // APPLE-LABEL: test_mm512_srai_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = ashr <16 x i32> %0, + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_srai_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = ashr <16 x i32> %0, + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_srai_epi32(__A, 5); } __m512i test_mm512_srai_epi32_2(__m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_srai_epi32_2 - // CHECK: @llvm.x86.avx512.psrai.d.512 + // APPLE-LABEL: test_mm512_srai_epi32_2 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %0, i32 %__B) #12 + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_srai_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %0, i32 %__B) #12 + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_srai_epi32(__A, __B); } __m512i test_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_srai_epi32 - // CHECK: @llvm.x86.avx512.psrai.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srai_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = ashr <16 x i32> %0, + // APPLE-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_srai_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = ashr <16 x i32> %0, + // X64-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_srai_epi32(__W, __U, __A, 5); } __m512i test_mm512_mask_srai_epi32_2(__m512i __W, __mmask16 __U, __m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_mask_srai_epi32_2 - // CHECK: @llvm.x86.avx512.psrai.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srai_epi32_2 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %0, i32 %__B) #12 + // APPLE-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_srai_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %0, i32 %__B) #12 + // X64-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_srai_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_srai_epi32 - // CHECK: @llvm.x86.avx512.psrai.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srai_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = ashr <16 x i32> %0, + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_srai_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = ashr <16 x i32> %0, + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_srai_epi32(__U, __A, 5); } __m512i test_mm512_maskz_srai_epi32_2(__mmask16 __U, __m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_maskz_srai_epi32_2 - // CHECK: @llvm.x86.avx512.psrai.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srai_epi32_2 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %0, i32 %__B) #12 + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_srai_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %0, i32 %__B) #12 + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_srai_epi32(__U, __A, __B); } __m512i test_mm512_srai_epi64(__m512i __A) { - // CHECK-LABEL: @test_mm512_srai_epi64 - // CHECK: @llvm.x86.avx512.psrai.q.512 + // APPLE-LABEL: test_mm512_srai_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = ashr <8 x i64> %__A, + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_srai_epi64 + // X64: entry: + // X64-NEXT: %0 = ashr <8 x i64> %__A, + // X64-NEXT: ret <8 x i64> %0 return _mm512_srai_epi64(__A, 5); } __m512i test_mm512_srai_epi64_2(__m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_srai_epi64_2 - // CHECK: @llvm.x86.avx512.psrai.q.512 + // APPLE-LABEL: test_mm512_srai_epi64_2 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %__A, i32 %__B) #12 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_srai_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %__A, i32 %__B) #12 + // X64-NEXT: ret <8 x i64> %0 return _mm512_srai_epi64(__A, __B); } __m512i test_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_srai_epi64 - // CHECK: @llvm.x86.avx512.psrai.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srai_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = ashr <8 x i64> %__A, + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_srai_epi64 + // X64: entry: + // X64-NEXT: %0 = ashr <8 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_srai_epi64(__W, __U, __A, 5); } __m512i test_mm512_mask_srai_epi64_2(__m512i __W, __mmask8 __U, __m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_mask_srai_epi64_2 - // CHECK: @llvm.x86.avx512.psrai.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srai_epi64_2 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %__A, i32 %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_srai_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %__A, i32 %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_srai_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_srai_epi64 - // CHECK: @llvm.x86.avx512.psrai.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srai_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = ashr <8 x i64> %__A, + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_srai_epi64 + // X64: entry: + // X64-NEXT: %0 = ashr <8 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_srai_epi64(__U, __A, 5); } __m512i test_mm512_maskz_srai_epi64_2(__mmask8 __U, __m512i __A, int __B) { - // CHECK-LABEL: @test_mm512_maskz_srai_epi64_2 - // CHECK: @llvm.x86.avx512.psrai.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srai_epi64_2 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %__A, i32 %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_srai_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %__A, i32 %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_srai_epi64(__U, __A, __B); } __m512i test_mm512_sll_epi32(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_sll_epi32 - // CHECK: @llvm.x86.avx512.psll.d.512 + // APPLE-LABEL: test_mm512_sll_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_sll_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_sll_epi32(__A, __B); } __m512i test_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_sll_epi32 - // CHECK: @llvm.x86.avx512.psll.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sll_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask_sll_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask_sll_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_sll_epi32 - // CHECK: @llvm.x86.avx512.psll.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_sll_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_maskz_sll_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_maskz_sll_epi32(__U, __A, __B); } __m512i test_mm512_sll_epi64(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_sll_epi64 - // CHECK: @llvm.x86.avx512.psll.q.512 + // APPLE-LABEL: test_mm512_sll_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_sll_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // X64-NEXT: ret <8 x i64> %0 return _mm512_sll_epi64(__A, __B); } __m512i test_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_sll_epi64 - // CHECK: @llvm.x86.avx512.psll.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sll_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_sll_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_sll_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_sll_epi64 - // CHECK: @llvm.x86.avx512.psll.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_sll_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_sll_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_sll_epi64(__U, __A, __B); } __m512i test_mm512_sllv_epi32(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_sllv_epi32 - // CHECK: @llvm.x86.avx512.psllv.d.512 + // APPLE-LABEL: test_mm512_sllv_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_sllv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_sllv_epi32(__X, __Y); } __m512i test_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_sllv_epi32 - // CHECK: @llvm.x86.avx512.psllv.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sllv_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask_sllv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask_sllv_epi32(__W, __U, __X, __Y); } __m512i test_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_sllv_epi32 - // CHECK: @llvm.x86.avx512.psllv.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_sllv_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_maskz_sllv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_maskz_sllv_epi32(__U, __X, __Y); } __m512i test_mm512_sllv_epi64(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_sllv_epi64 - // CHECK: @llvm.x86.avx512.psllv.q.512 + // APPLE-LABEL: test_mm512_sllv_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_sllv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // X64-NEXT: ret <8 x i64> %0 return _mm512_sllv_epi64(__X, __Y); } __m512i test_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_sllv_epi64 - // CHECK: @llvm.x86.avx512.psllv.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sllv_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_sllv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_sllv_epi64(__W, __U, __X, __Y); } __m512i test_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_sllv_epi64 - // CHECK: @llvm.x86.avx512.psllv.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_sllv_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_sllv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_sllv_epi64(__U, __X, __Y); } __m512i test_mm512_sra_epi32(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_sra_epi32 - // CHECK: @llvm.x86.avx512.psra.d.512 + // APPLE-LABEL: test_mm512_sra_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_sra_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_sra_epi32(__A, __B); } __m512i test_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_sra_epi32 - // CHECK: @llvm.x86.avx512.psra.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sra_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask_sra_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask_sra_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_sra_epi32 - // CHECK: @llvm.x86.avx512.psra.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_sra_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_maskz_sra_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_maskz_sra_epi32(__U, __A, __B); } __m512i test_mm512_sra_epi64(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_sra_epi64 - // CHECK: @llvm.x86.avx512.psra.q.512 + // APPLE-LABEL: test_mm512_sra_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_sra_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // X64-NEXT: ret <8 x i64> %0 return _mm512_sra_epi64(__A, __B); } __m512i test_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_sra_epi64 - // CHECK: @llvm.x86.avx512.psra.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_sra_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_sra_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_sra_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_sra_epi64 - // CHECK: @llvm.x86.avx512.psra.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_sra_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_sra_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_sra_epi64(__U, __A, __B); } __m512i test_mm512_srav_epi32(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_srav_epi32 - // CHECK: @llvm.x86.avx512.psrav.d.512 + // APPLE-LABEL: test_mm512_srav_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_srav_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_srav_epi32(__X, __Y); } __m512i test_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_srav_epi32 - // CHECK: @llvm.x86.avx512.psrav.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srav_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask_srav_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask_srav_epi32(__W, __U, __X, __Y); } __m512i test_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_srav_epi32 - // CHECK: @llvm.x86.avx512.psrav.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srav_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_maskz_srav_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_maskz_srav_epi32(__U, __X, __Y); } __m512i test_mm512_srav_epi64(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_srav_epi64 - // CHECK: @llvm.x86.avx512.psrav.q.512 + // APPLE-LABEL: test_mm512_srav_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_srav_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // X64-NEXT: ret <8 x i64> %0 return _mm512_srav_epi64(__X, __Y); } __m512i test_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_srav_epi64 - // CHECK: @llvm.x86.avx512.psrav.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srav_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_srav_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_srav_epi64(__W, __U, __X, __Y); } __m512i test_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_srav_epi64 - // CHECK: @llvm.x86.avx512.psrav.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srav_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_srav_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_srav_epi64(__U, __X, __Y); } __m512i test_mm512_srl_epi32(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_srl_epi32 - // CHECK: @llvm.x86.avx512.psrl.d.512 + // APPLE-LABEL: test_mm512_srl_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_srl_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_srl_epi32(__A, __B); } __m512i test_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_srl_epi32 - // CHECK: @llvm.x86.avx512.psrl.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srl_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask_srl_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask_srl_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_srl_epi32 - // CHECK: @llvm.x86.avx512.psrl.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srl_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_maskz_srl_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %0, <4 x i32> %1) #12 + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_maskz_srl_epi32(__U, __A, __B); } __m512i test_mm512_srl_epi64(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_srl_epi64 - // CHECK: @llvm.x86.avx512.psrl.q.512 + // APPLE-LABEL: test_mm512_srl_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_srl_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // X64-NEXT: ret <8 x i64> %0 return _mm512_srl_epi64(__A, __B); } __m512i test_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_srl_epi64 - // CHECK: @llvm.x86.avx512.psrl.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srl_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_srl_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_srl_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_srl_epi64 - // CHECK: @llvm.x86.avx512.psrl.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srl_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_srl_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %__A, <2 x i64> %__B) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_srl_epi64(__U, __A, __B); } __m512i test_mm512_srlv_epi32(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_srlv_epi32 - // CHECK: @llvm.x86.avx512.psrlv.d.512 + // APPLE-LABEL: test_mm512_srlv_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_srlv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_srlv_epi32(__X, __Y); } __m512i test_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_srlv_epi32 - // CHECK: @llvm.x86.avx512.psrlv.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srlv_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask_srlv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask_srlv_epi32(__W, __U, __X, __Y); } __m512i test_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_srlv_epi32 - // CHECK: @llvm.x86.avx512.psrlv.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srlv_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_maskz_srlv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__Y to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_maskz_srlv_epi32(__U, __X, __Y); } __m512i test_mm512_srlv_epi64(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_srlv_epi64 - // CHECK: @llvm.x86.avx512.psrlv.q.512 + // APPLE-LABEL: test_mm512_srlv_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_srlv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // X64-NEXT: ret <8 x i64> %0 return _mm512_srlv_epi64(__X, __Y); } __m512i test_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_srlv_epi64 - // CHECK: @llvm.x86.avx512.psrlv.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_srlv_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_srlv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_srlv_epi64(__W, __U, __X, __Y); } __m512i test_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_srlv_epi64 - // CHECK: @llvm.x86.avx512.psrlv.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_srlv_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_srlv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %__X, <8 x i64> %__Y) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_srlv_epi64(__U, __X, __Y); } __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.pternlog.d.512 + // APPLE-LABEL: test_mm512_ternarylogic_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4) + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_ternarylogic_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4) + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_ternarylogic_epi32(__A, __B, __C, 4); } __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.pternlog.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_ternarylogic_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4) + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0 + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask_ternarylogic_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4) + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0 + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask_ternarylogic_epi32(__A, __U, __B, __C, 4); } __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.pternlog.d.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_ternarylogic_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4) + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_maskz_ternarylogic_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4) + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); } __m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_ternarylogic_epi64 - // CHECK: @llvm.x86.avx512.pternlog.q.512 + // APPLE-LABEL: test_mm512_ternarylogic_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4) + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_ternarylogic_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4) + // X64-NEXT: ret <8 x i64> %0 return _mm512_ternarylogic_epi64(__A, __B, __C, 4); } __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_ternarylogic_epi64 - // CHECK: @llvm.x86.avx512.pternlog.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_ternarylogic_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_ternarylogic_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_ternarylogic_epi64(__A, __U, __B, __C, 4); } __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_ternarylogic_epi64 - // CHECK: @llvm.x86.avx512.pternlog.q.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> zeroinitializer + // APPLE-LABEL: test_mm512_maskz_ternarylogic_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_ternarylogic_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4); } __m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_shuffle_f32x4 - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_shuffle_f32x4 + // APPLE: entry: + // APPLE-NEXT: %shuf = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // APPLE-NEXT: ret <16 x float> %shuf + // X64-LABEL: test_mm512_shuffle_f32x4 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // X64-NEXT: ret <16 x float> %shuf return _mm512_shuffle_f32x4(__A, __B, 4); } __m512 test_mm512_mask_shuffle_f32x4(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_shuffle_f32x4 - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_shuffle_f32x4 + // APPLE: entry: + // APPLE-NEXT: %shuf = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuf, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_shuffle_f32x4 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuf, <16 x float> %__W + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_shuffle_f32x4(__W, __U, __A, __B, 4); } __m512 test_mm512_maskz_shuffle_f32x4(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_f32x4 - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_shuffle_f32x4 + // APPLE: entry: + // APPLE-NEXT: %shuf = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuf, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_shuffle_f32x4 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuf, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_shuffle_f32x4(__U, __A, __B, 4); } __m512d test_mm512_shuffle_f64x2(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_shuffle_f64x2 - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_shuffle_f64x2 + // APPLE: entry: + // APPLE-NEXT: %shuf = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // APPLE-NEXT: ret <8 x double> %shuf + // X64-LABEL: test_mm512_shuffle_f64x2 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // X64-NEXT: ret <8 x double> %shuf return _mm512_shuffle_f64x2(__A, __B, 4); } __m512d test_mm512_mask_shuffle_f64x2(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_shuffle_f64x2 - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_shuffle_f64x2 + // APPLE: entry: + // APPLE-NEXT: %shuf = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuf, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_shuffle_f64x2 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuf, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_shuffle_f64x2(__W, __U, __A, __B, 4); } __m512d test_mm512_maskz_shuffle_f64x2(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_f64x2 - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_shuffle_f64x2 + // APPLE: entry: + // APPLE-NEXT: %shuf = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuf, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_shuffle_f64x2 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuf, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_shuffle_f64x2(__U, __A, __B, 4); } __m512i test_mm512_shuffle_i32x4(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_shuffle_i32x4 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_shuffle_i32x4 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %shuf = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // APPLE-NEXT: %2 = bitcast <16 x i32> %shuf to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_shuffle_i32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %shuf = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // X64-NEXT: %2 = bitcast <16 x i32> %shuf to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_shuffle_i32x4(__A, __B, 4); } __m512i test_mm512_mask_shuffle_i32x4(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_shuffle_i32x4 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_shuffle_i32x4 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %shuf = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %shuf, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_shuffle_i32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %shuf = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %shuf, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_shuffle_i32x4(__W, __U, __A, __B, 4); } __m512i test_mm512_maskz_shuffle_i32x4(__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_i32x4 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_shuffle_i32x4 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %shuf = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %shuf, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_shuffle_i32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %shuf = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %shuf, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_shuffle_i32x4(__U, __A, __B, 4); } __m512i test_mm512_shuffle_i64x2(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_shuffle_i64x2 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_shuffle_i64x2 + // APPLE: entry: + // APPLE-NEXT: %shuf = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // APPLE-NEXT: ret <8 x i64> %shuf + // X64-LABEL: test_mm512_shuffle_i64x2 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // X64-NEXT: ret <8 x i64> %shuf return _mm512_shuffle_i64x2(__A, __B, 4); } __m512i test_mm512_mask_shuffle_i64x2(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_shuffle_i64x2 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_shuffle_i64x2 + // APPLE: entry: + // APPLE-NEXT: %shuf = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuf, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_shuffle_i64x2 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuf, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_shuffle_i64x2(__W, __U, __A, __B, 4); } __m512i test_mm512_maskz_shuffle_i64x2(__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_i64x2 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_shuffle_i64x2 + // APPLE: entry: + // APPLE-NEXT: %shuf = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuf, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_shuffle_i64x2 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuf, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_shuffle_i64x2(__U, __A, __B, 4); } __m512d test_mm512_shuffle_pd(__m512d __M, __m512d __V) { - // CHECK-LABEL: @test_mm512_shuffle_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_shuffle_pd + // APPLE: entry: + // APPLE-NEXT: %shufp = shufflevector <8 x double> %__M, <8 x double> %__V, <8 x i32> + // APPLE-NEXT: ret <8 x double> %shufp + // X64-LABEL: test_mm512_shuffle_pd + // X64: entry: + // X64-NEXT: %shufp = shufflevector <8 x double> %__M, <8 x double> %__V, <8 x i32> + // X64-NEXT: ret <8 x double> %shufp return _mm512_shuffle_pd(__M, __V, 4); } __m512d test_mm512_mask_shuffle_pd(__m512d __W, __mmask8 __U, __m512d __M, __m512d __V) { - // CHECK-LABEL: @test_mm512_mask_shuffle_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_shuffle_pd + // APPLE: entry: + // APPLE-NEXT: %shufp = shufflevector <8 x double> %__M, <8 x double> %__V, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %shufp, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_shuffle_pd + // X64: entry: + // X64-NEXT: %shufp = shufflevector <8 x double> %__M, <8 x double> %__V, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %shufp, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_shuffle_pd(__W, __U, __M, __V, 4); } __m512d test_mm512_maskz_shuffle_pd(__mmask8 __U, __m512d __M, __m512d __V) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_shuffle_pd + // APPLE: entry: + // APPLE-NEXT: %shufp = shufflevector <8 x double> %__M, <8 x double> %__V, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %shufp, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_shuffle_pd + // X64: entry: + // X64-NEXT: %shufp = shufflevector <8 x double> %__M, <8 x double> %__V, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %shufp, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_shuffle_pd(__U, __M, __V, 4); } __m512 test_mm512_shuffle_ps(__m512 __M, __m512 __V) { - // CHECK-LABEL: @test_mm512_shuffle_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_shuffle_ps + // APPLE: entry: + // APPLE-NEXT: %shufp = shufflevector <16 x float> %__M, <16 x float> %__V, <16 x i32> + // APPLE-NEXT: ret <16 x float> %shufp + // X64-LABEL: test_mm512_shuffle_ps + // X64: entry: + // X64-NEXT: %shufp = shufflevector <16 x float> %__M, <16 x float> %__V, <16 x i32> + // X64-NEXT: ret <16 x float> %shufp return _mm512_shuffle_ps(__M, __V, 4); } __m512 test_mm512_mask_shuffle_ps(__m512 __W, __mmask16 __U, __m512 __M, __m512 __V) { - // CHECK-LABEL: @test_mm512_mask_shuffle_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_shuffle_ps + // APPLE: entry: + // APPLE-NEXT: %shufp = shufflevector <16 x float> %__M, <16 x float> %__V, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shufp, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_shuffle_ps + // X64: entry: + // X64-NEXT: %shufp = shufflevector <16 x float> %__M, <16 x float> %__V, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shufp, <16 x float> %__W + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_shuffle_ps(__W, __U, __M, __V, 4); } __m512 test_mm512_maskz_shuffle_ps(__mmask16 __U, __m512 __M, __m512 __V) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_shuffle_ps + // APPLE: entry: + // APPLE-NEXT: %shufp = shufflevector <16 x float> %__M, <16 x float> %__V, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shufp, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_shuffle_ps + // X64: entry: + // X64-NEXT: %shufp = shufflevector <16 x float> %__M, <16 x float> %__V, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shufp, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_shuffle_ps(__U, __M, __V, 4); } __m128d test_mm_sqrt_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_sqrt_round_sd - // CHECK: call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 -1, i32 8) + // APPLE-LABEL: test_mm_sqrt_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_sqrt_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_sqrt_round_sd(__A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_sqrt_sd - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: call double @llvm.sqrt.f64(double %{{.*}}) - // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 {{.*}}, double {{.*}}, double {{.*}} - // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double {{.*}}, i64 0 + // APPLE-LABEL: test_mm_mask_sqrt_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %1 = tail call double @llvm.sqrt.f64(double %0) #12 + // APPLE-NEXT: %2 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %4 = extractelement <8 x i1> %3, i64 0 + // APPLE-NEXT: %5 = select i1 %4, double %1, double %2 + // APPLE-NEXT: %6 = insertelement <2 x double> %__A, double %5, i64 0 + // APPLE-NEXT: ret <2 x double> %6 + // X64-LABEL: test_mm_mask_sqrt_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %1 = tail call double @llvm.sqrt.f64(double %0) #12 + // X64-NEXT: %2 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = extractelement <8 x i1> %3, i64 0 + // X64-NEXT: %5 = select i1 %4, double %1, double %2 + // X64-NEXT: %6 = insertelement <2 x double> %__A, double %5, i64 0 + // X64-NEXT: ret <2 x double> %6 return _mm_mask_sqrt_sd(__W,__U,__A,__B); } __m128d test_mm_mask_sqrt_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_sqrt_round_sd - // CHECK: call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8) + // APPLE-LABEL: test_mm_mask_sqrt_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_sqrt_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_sqrt_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_sqrt_sd - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: call double @llvm.sqrt.f64(double %{{.*}}) - // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 {{.*}}, double {{.*}}, double {{.*}} - // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double {{.*}}, i64 0 + // APPLE-LABEL: test_mm_maskz_sqrt_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %1 = tail call double @llvm.sqrt.f64(double %0) #12 + // APPLE-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %3 = extractelement <8 x i1> %2, i64 0 + // APPLE-NEXT: %4 = select i1 %3, double %1, double 0.000000e+00 + // APPLE-NEXT: %5 = insertelement <2 x double> %__A, double %4, i64 0 + // APPLE-NEXT: ret <2 x double> %5 + // X64-LABEL: test_mm_maskz_sqrt_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %1 = tail call double @llvm.sqrt.f64(double %0) #12 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = extractelement <8 x i1> %2, i64 0 + // X64-NEXT: %4 = select i1 %3, double %1, double 0.000000e+00 + // X64-NEXT: %5 = insertelement <2 x double> %__A, double %4, i64 0 + // X64-NEXT: ret <2 x double> %5 return _mm_maskz_sqrt_sd(__U,__A,__B); } __m128d test_mm_maskz_sqrt_round_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_sqrt_round_sd - // CHECK: call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8) + // APPLE-LABEL: test_mm_maskz_sqrt_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 8) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_sqrt_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 8) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_sqrt_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_sqrt_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_sqrt_round_ss - // CHECK: call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 -1, i32 8) + // APPLE-LABEL: test_mm_sqrt_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_sqrt_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_sqrt_round_ss(__A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_sqrt_ss - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: call float @llvm.sqrt.f32(float %{{.*}}) - // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 {{.*}}, float {{.*}}, float {{.*}} - // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float {{.*}}, i64 0 + // APPLE-LABEL: test_mm_mask_sqrt_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %1 = tail call float @llvm.sqrt.f32(float %0) #12 + // APPLE-NEXT: %2 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %4 = extractelement <8 x i1> %3, i64 0 + // APPLE-NEXT: %5 = select i1 %4, float %1, float %2 + // APPLE-NEXT: %6 = insertelement <4 x float> %__A, float %5, i64 0 + // APPLE-NEXT: ret <4 x float> %6 + // X64-LABEL: test_mm_mask_sqrt_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %1 = tail call float @llvm.sqrt.f32(float %0) #12 + // X64-NEXT: %2 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = extractelement <8 x i1> %3, i64 0 + // X64-NEXT: %5 = select i1 %4, float %1, float %2 + // X64-NEXT: %6 = insertelement <4 x float> %__A, float %5, i64 0 + // X64-NEXT: ret <4 x float> %6 return _mm_mask_sqrt_ss(__W,__U,__A,__B); } __m128 test_mm_mask_sqrt_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_sqrt_round_ss - // CHECK: call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 {{.*}}, i32 8) + // APPLE-LABEL: test_mm_mask_sqrt_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_sqrt_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_mask_sqrt_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_sqrt_ss - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: call float @llvm.sqrt.f32(float %{{.*}}) - // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: select i1 {{.*}}, float {{.*}}, float {{.*}} - // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float {{.*}}, i64 0 + // APPLE-LABEL: test_mm_maskz_sqrt_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %1 = tail call float @llvm.sqrt.f32(float %0) #12 + // APPLE-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %3 = extractelement <8 x i1> %2, i64 0 + // APPLE-NEXT: %4 = select i1 %3, float %1, float 0.000000e+00 + // APPLE-NEXT: %5 = insertelement <4 x float> %__A, float %4, i64 0 + // APPLE-NEXT: ret <4 x float> %5 + // X64-LABEL: test_mm_maskz_sqrt_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %1 = tail call float @llvm.sqrt.f32(float %0) #12 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = extractelement <8 x i1> %2, i64 0 + // X64-NEXT: %4 = select i1 %3, float %1, float 0.000000e+00 + // X64-NEXT: %5 = insertelement <4 x float> %__A, float %4, i64 0 + // X64-NEXT: ret <4 x float> %5 return _mm_maskz_sqrt_ss(__U,__A,__B); } __m128 test_mm_maskz_sqrt_round_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_sqrt_round_ss - // CHECK: call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 {{.*}}, i32 8) + // APPLE-LABEL: test_mm_maskz_sqrt_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 8) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_sqrt_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 8) + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_sqrt_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_broadcast_f32x4(float const* __A) { - // CHECK-LABEL: @test_mm512_broadcast_f32x4 - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_broadcast_f32x4 + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast float* %__A to <4 x float>* + // APPLE-NEXT: %0 = load <4 x float>, <4 x float>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: %shuffle.i = shufflevector <4 x float> %0, <4 x float> undef, <16 x i32> + // APPLE-NEXT: ret <16 x float> %shuffle.i + // X64-LABEL: test_mm512_broadcast_f32x4 + // X64: entry: + // X64-NEXT: %__v.i = bitcast float* %__A to <4 x float>* + // X64-NEXT: %0 = load <4 x float>, <4 x float>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i = shufflevector <4 x float> %0, <4 x float> undef, <16 x i32> + // X64-NEXT: ret <16 x float> %shuffle.i return _mm512_broadcast_f32x4(_mm_loadu_ps(__A)); } __m512 test_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, float const* __A) { - // CHECK-LABEL: @test_mm512_mask_broadcast_f32x4 - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_broadcast_f32x4 + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast float* %__A to <4 x float>* + // APPLE-NEXT: %0 = load <4 x float>, <4 x float>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: %shuffle.i.i = shufflevector <4 x float> %0, <4 x float> undef, <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %shuffle.i.i, <16 x float> %__O + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_broadcast_f32x4 + // X64: entry: + // X64-NEXT: %__v.i = bitcast float* %__A to <4 x float>* + // X64-NEXT: %0 = load <4 x float>, <4 x float>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %0, <4 x float> undef, <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %shuffle.i.i, <16 x float> %__O + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_broadcast_f32x4(__O, __M, _mm_loadu_ps(__A)); } __m512 test_mm512_maskz_broadcast_f32x4(__mmask16 __M, float const* __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcast_f32x4 - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_broadcast_f32x4 + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast float* %__A to <4 x float>* + // APPLE-NEXT: %0 = load <4 x float>, <4 x float>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: %shuffle.i.i = shufflevector <4 x float> %0, <4 x float> undef, <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %shuffle.i.i, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_broadcast_f32x4 + // X64: entry: + // X64-NEXT: %__v.i = bitcast float* %__A to <4 x float>* + // X64-NEXT: %0 = load <4 x float>, <4 x float>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %0, <4 x float> undef, <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %shuffle.i.i, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_broadcast_f32x4(__M, _mm_loadu_ps(__A)); } __m512d test_mm512_broadcast_f64x4(double const* __A) { - // CHECK-LABEL: @test_mm512_broadcast_f64x4 - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_broadcast_f64x4 + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast double* %__A to <4 x double>* + // APPLE-NEXT: %0 = load <4 x double>, <4 x double>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: %shuffle.i = shufflevector <4 x double> %0, <4 x double> undef, <8 x i32> + // APPLE-NEXT: ret <8 x double> %shuffle.i + // X64-LABEL: test_mm512_broadcast_f64x4 + // X64: entry: + // X64-NEXT: %__v.i = bitcast double* %__A to <4 x double>* + // X64-NEXT: %0 = load <4 x double>, <4 x double>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i = shufflevector <4 x double> %0, <4 x double> undef, <8 x i32> + // X64-NEXT: ret <8 x double> %shuffle.i return _mm512_broadcast_f64x4(_mm256_loadu_pd(__A)); } __m512d test_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, double const* __A) { - // CHECK-LABEL: @test_mm512_mask_broadcast_f64x4 - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_broadcast_f64x4 + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast double* %__A to <4 x double>* + // APPLE-NEXT: %0 = load <4 x double>, <4 x double>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: %shuffle.i.i = shufflevector <4 x double> %0, <4 x double> undef, <8 x i32> + // APPLE-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %shuffle.i.i, <8 x double> %__O + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_broadcast_f64x4 + // X64: entry: + // X64-NEXT: %__v.i = bitcast double* %__A to <4 x double>* + // X64-NEXT: %0 = load <4 x double>, <4 x double>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i.i = shufflevector <4 x double> %0, <4 x double> undef, <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %shuffle.i.i, <8 x double> %__O + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_broadcast_f64x4(__O, __M, _mm256_loadu_pd(__A)); } __m512d test_mm512_maskz_broadcast_f64x4(__mmask8 __M, double const* __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcast_f64x4 - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_broadcast_f64x4 + // APPLE: entry: + // APPLE-NEXT: %__v.i = bitcast double* %__A to <4 x double>* + // APPLE-NEXT: %0 = load <4 x double>, <4 x double>* %__v.i, align 1, !tbaa !2 + // APPLE-NEXT: %shuffle.i.i = shufflevector <4 x double> %0, <4 x double> undef, <8 x i32> + // APPLE-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %shuffle.i.i, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_broadcast_f64x4 + // X64: entry: + // X64-NEXT: %__v.i = bitcast double* %__A to <4 x double>* + // X64-NEXT: %0 = load <4 x double>, <4 x double>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i.i = shufflevector <4 x double> %0, <4 x double> undef, <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %shuffle.i.i, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_broadcast_f64x4(__M, _mm256_loadu_pd(__A)); } __m512i test_mm512_broadcast_i32x4(__m128i const* __A) { - // CHECK-LABEL: @test_mm512_broadcast_i32x4 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_broadcast_i32x4 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64>* %__A to <4 x i32>* + // APPLE-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 1, !tbaa !2 + // APPLE-NEXT: %shuffle.i = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> + // APPLE-NEXT: %2 = bitcast <16 x i32> %shuffle.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_broadcast_i32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64>* %__A to <4 x i32>* + // X64-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> + // X64-NEXT: %2 = bitcast <16 x i32> %shuffle.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_broadcast_i32x4(_mm_loadu_si128(__A)); } __m512i test_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i const* __A) { - // CHECK-LABEL: @test_mm512_mask_broadcast_i32x4 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_broadcast_i32x4 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64>* %__A to <4 x i32>* + // APPLE-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 1, !tbaa !2 + // APPLE-NEXT: %shuffle.i.i = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__O to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %shuffle.i.i, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_broadcast_i32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64>* %__A to <4 x i32>* + // X64-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__O to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %shuffle.i.i, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_broadcast_i32x4(__O, __M, _mm_loadu_si128(__A)); } __m512i test_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i const* __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcast_i32x4 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_broadcast_i32x4 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64>* %__A to <4 x i32>* + // APPLE-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 1, !tbaa !2 + // APPLE-NEXT: %shuffle.i.i = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %shuffle.i.i, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_broadcast_i32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64>* %__A to <4 x i32>* + // X64-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %shuffle.i.i, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_broadcast_i32x4(__M, _mm_loadu_si128(__A)); } __m512i test_mm512_broadcast_i64x4(__m256i const* __A) { - // CHECK-LABEL: @test_mm512_broadcast_i64x4 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_broadcast_i64x4 + // APPLE: entry: + // APPLE-NEXT: %0 = load <4 x i64>, <4 x i64>* %__A, align 1, !tbaa !2 + // APPLE-NEXT: %shuffle.i = shufflevector <4 x i64> %0, <4 x i64> undef, <8 x i32> + // APPLE-NEXT: ret <8 x i64> %shuffle.i + // X64-LABEL: test_mm512_broadcast_i64x4 + // X64: entry: + // X64-NEXT: %0 = load <4 x i64>, <4 x i64>* %__A, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i = shufflevector <4 x i64> %0, <4 x i64> undef, <8 x i32> + // X64-NEXT: ret <8 x i64> %shuffle.i return _mm512_broadcast_i64x4(_mm256_loadu_si256(__A)); } __m512i test_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i const* __A) { - // CHECK-LABEL: @test_mm512_mask_broadcast_i64x4 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_broadcast_i64x4 + // APPLE: entry: + // APPLE-NEXT: %0 = load <4 x i64>, <4 x i64>* %__A, align 1, !tbaa !2 + // APPLE-NEXT: %shuffle.i.i = shufflevector <4 x i64> %0, <4 x i64> undef, <8 x i32> + // APPLE-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %shuffle.i.i, <8 x i64> %__O + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_broadcast_i64x4 + // X64: entry: + // X64-NEXT: %0 = load <4 x i64>, <4 x i64>* %__A, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i64> %0, <4 x i64> undef, <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %shuffle.i.i, <8 x i64> %__O + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_broadcast_i64x4(__O, __M, _mm256_loadu_si256(__A)); } __m512i test_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i const* __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcast_i64x4 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_broadcast_i64x4 + // APPLE: entry: + // APPLE-NEXT: %0 = load <4 x i64>, <4 x i64>* %__A, align 1, !tbaa !2 + // APPLE-NEXT: %shuffle.i.i = shufflevector <4 x i64> %0, <4 x i64> undef, <8 x i32> + // APPLE-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %shuffle.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_broadcast_i64x4 + // X64: entry: + // X64-NEXT: %0 = load <4 x i64>, <4 x i64>* %__A, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i64> %0, <4 x i64> undef, <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %shuffle.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_broadcast_i64x4(__M, _mm256_loadu_si256(__A)); } __m512d test_mm512_broadcastsd_pd(__m128d __A) { - // CHECK-LABEL: @test_mm512_broadcastsd_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> zeroinitializer + // APPLE-LABEL: test_mm512_broadcastsd_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <2 x double> %__A, <2 x double> undef, <8 x i32> zeroinitializer + // APPLE-NEXT: ret <8 x double> %shuffle.i + // X64-LABEL: test_mm512_broadcastsd_pd + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <2 x double> %__A, <2 x double> undef, <8 x i32> zeroinitializer + // X64-NEXT: ret <8 x double> %shuffle.i return _mm512_broadcastsd_pd(__A); } __m512d test_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) { - // CHECK-LABEL: @test_mm512_mask_broadcastsd_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> zeroinitializer - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_broadcastsd_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> undef, <8 x i32> zeroinitializer + // APPLE-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> %__O + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_broadcastsd_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> undef, <8 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> %__O + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_broadcastsd_pd(__O, __M, __A); } __m512d test_mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcastsd_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> zeroinitializer - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_broadcastsd_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> undef, <8 x i32> zeroinitializer + // APPLE-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_broadcastsd_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> undef, <8 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %shuffle.i.i, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_broadcastsd_pd(__M, __A); } __m512 test_mm512_broadcastss_ps(__m128 __A) { - // CHECK-LABEL: @test_mm512_broadcastss_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> zeroinitializer + // APPLE-LABEL: test_mm512_broadcastss_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <4 x float> %__A, <4 x float> undef, <16 x i32> zeroinitializer + // APPLE-NEXT: ret <16 x float> %shuffle.i + // X64-LABEL: test_mm512_broadcastss_ps + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <4 x float> %__A, <4 x float> undef, <16 x i32> zeroinitializer + // X64-NEXT: ret <16 x float> %shuffle.i return _mm512_broadcastss_ps(__A); } __m512 test_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) { - // CHECK-LABEL: @test_mm512_mask_broadcastss_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> zeroinitializer - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_broadcastss_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <16 x i32> zeroinitializer + // APPLE-NEXT: %0 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> %__O + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_broadcastss_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <16 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> %__O + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_broadcastss_ps(__O, __M, __A); } __m512 test_mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcastss_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> zeroinitializer - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_broadcastss_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <16 x i32> zeroinitializer + // APPLE-NEXT: %0 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_broadcastss_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <16 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_broadcastss_ps(__M, __A); } __m512i test_mm512_broadcastd_epi32(__m128i __A) { - // CHECK-LABEL: @test_mm512_broadcastd_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> zeroinitializer + // APPLE-LABEL: test_mm512_broadcastd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // APPLE-NEXT: %shuffle.i = shufflevector <4 x i32> %0, <4 x i32> undef, <16 x i32> zeroinitializer + // APPLE-NEXT: %1 = bitcast <16 x i32> %shuffle.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_broadcastd_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %shuffle.i = shufflevector <4 x i32> %0, <4 x i32> undef, <16 x i32> zeroinitializer + // X64-NEXT: %1 = bitcast <16 x i32> %shuffle.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_broadcastd_epi32(__A); } __m512i test_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_broadcastd_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> zeroinitializer - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_broadcastd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // APPLE-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <16 x i32> zeroinitializer + // APPLE-NEXT: %1 = bitcast <8 x i64> %__O to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %shuffle.i.i, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_broadcastd_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <16 x i32> zeroinitializer + // X64-NEXT: %1 = bitcast <8 x i64> %__O to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %shuffle.i.i, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_broadcastd_epi32(__O, __M, __A); } __m512i test_mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcastd_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> zeroinitializer - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_broadcastd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // APPLE-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <16 x i32> zeroinitializer + // APPLE-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x i32> %shuffle.i.i, <16 x i32> zeroinitializer + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_broadcastd_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <16 x i32> zeroinitializer + // X64-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x i32> %shuffle.i.i, <16 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_broadcastd_epi32(__M, __A); } __m512i test_mm512_broadcastq_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm512_broadcastq_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> zeroinitializer + // APPLE-LABEL: test_mm512_broadcastq_epi64 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <2 x i64> %__A, <2 x i64> undef, <8 x i32> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %shuffle.i + // X64-LABEL: test_mm512_broadcastq_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <2 x i64> %__A, <2 x i64> undef, <8 x i32> zeroinitializer + // X64-NEXT: ret <8 x i64> %shuffle.i return _mm512_broadcastq_epi64(__A); } __m512i test_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_broadcastq_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> zeroinitializer - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_broadcastq_epi64 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <2 x i64> %__A, <2 x i64> undef, <8 x i32> zeroinitializer + // APPLE-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuffle.i.i, <8 x i64> %__O + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_broadcastq_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x i64> %__A, <2 x i64> undef, <8 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuffle.i.i, <8 x i64> %__O + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_broadcastq_epi64(__O, __M, __A); } __m512i test_mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcastq_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> zeroinitializer - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_broadcastq_epi64 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <2 x i64> %__A, <2 x i64> undef, <8 x i32> zeroinitializer + // APPLE-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuffle.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_broadcastq_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x i64> %__A, <2 x i64> undef, <8 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %shuffle.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_broadcastq_epi64(__M, __A); } __m128i test_mm512_cvtsepi32_epi8(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtsepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.db.512 + // APPLE-LABEL: test_mm512_cvtsepi32_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 -1) #12 + // APPLE-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %2 + // X64-LABEL: test_mm512_cvtsepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 -1) #12 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm512_cvtsepi32_epi8(__A); } __m128i test_mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.db.512 + // APPLE-LABEL: test_mm512_mask_cvtsepi32_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // APPLE-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %0, <16 x i8> %1, i16 %__M) #12 + // APPLE-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %3 + // X64-LABEL: test_mm512_mask_cvtsepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %0, <16 x i8> %1, i16 %__M) #12 + // X64-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm512_mask_cvtsepi32_epi8(__O, __M, __A); } __m128i test_mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtsepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.db.512 + // APPLE-LABEL: test_mm512_maskz_cvtsepi32_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 %__M) #12 + // APPLE-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %2 + // X64-LABEL: test_mm512_maskz_cvtsepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 %__M) #12 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm512_maskz_cvtsepi32_epi8(__M, __A); } void test_mm512_mask_cvtsepi32_storeu_epi8(void * __P, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi32_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.db.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtsepi32_storeu_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %__P, <16 x i32> %0, i16 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtsepi32_storeu_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %__P, <16 x i32> %0, i16 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtsepi32_storeu_epi8(__P, __M, __A); } __m256i test_mm512_cvtsepi32_epi16(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtsepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.dw.512 + // APPLE-LABEL: test_mm512_cvtsepi32_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %0, <16 x i16> zeroinitializer, i16 -1) #12 + // APPLE-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_cvtsepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %0, <16 x i16> zeroinitializer, i16 -1) #12 + // X64-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_cvtsepi32_epi16(__A); } __m256i test_mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.dw.512 + // APPLE-LABEL: test_mm512_mask_cvtsepi32_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <4 x i64> %__O to <16 x i16> + // APPLE-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %0, <16 x i16> %1, i16 %__M) #12 + // APPLE-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %3 + // X64-LABEL: test_mm512_mask_cvtsepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__O to <16 x i16> + // X64-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %0, <16 x i16> %1, i16 %__M) #12 + // X64-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm512_mask_cvtsepi32_epi16(__O, __M, __A); } __m256i test_mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtsepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.dw.512 + // APPLE-LABEL: test_mm512_maskz_cvtsepi32_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %0, <16 x i16> zeroinitializer, i16 %__M) #12 + // APPLE-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_maskz_cvtsepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %0, <16 x i16> zeroinitializer, i16 %__M) #12 + // X64-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_maskz_cvtsepi32_epi16(__M, __A); } void test_mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi32_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.dw.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtsepi32_storeu_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %__P, <16 x i32> %0, i16 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtsepi32_storeu_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %__P, <16 x i32> %0, i16 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtsepi32_storeu_epi16(__P, __M, __A); } __m128i test_mm512_cvtsepi64_epi8(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtsepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.qb.512 + // APPLE-LABEL: test_mm512_cvtsepi64_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %__A, <16 x i8> zeroinitializer, i8 -1) #12 + // APPLE-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %1 + // X64-LABEL: test_mm512_cvtsepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %__A, <16 x i8> zeroinitializer, i8 -1) #12 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm512_cvtsepi64_epi8(__A); } __m128i test_mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.qb.512 + // APPLE-LABEL: test_mm512_mask_cvtsepi64_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // APPLE-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %__A, <16 x i8> %0, i8 %__M) #12 + // APPLE-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtsepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %__A, <16 x i8> %0, i8 %__M) #12 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm512_mask_cvtsepi64_epi8(__O, __M, __A); } __m128i test_mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtsepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.qb.512 + // APPLE-LABEL: test_mm512_maskz_cvtsepi64_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %__A, <16 x i8> zeroinitializer, i8 %__M) #12 + // APPLE-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtsepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %__A, <16 x i8> zeroinitializer, i8 %__M) #12 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm512_maskz_cvtsepi64_epi8(__M, __A); } void test_mm512_mask_cvtsepi64_storeu_epi8(void * __P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi64_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.qb.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtsepi64_storeu_epi8 + // APPLE: entry: + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtsepi64_storeu_epi8 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtsepi64_storeu_epi8(__P, __M, __A); } __m256i test_mm512_cvtsepi64_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtsepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovs.qd.512 + // APPLE-LABEL: test_mm512_cvtsepi64_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %__A, <8 x i32> zeroinitializer, i8 -1) #12 + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_cvtsepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %__A, <8 x i32> zeroinitializer, i8 -1) #12 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_cvtsepi64_epi32(__A); } __m256i test_mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovs.qd.512 + // APPLE-LABEL: test_mm512_mask_cvtsepi64_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__O to <8 x i32> + // APPLE-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %__A, <8 x i32> %0, i8 %__M) #12 + // APPLE-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtsepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__O to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %__A, <8 x i32> %0, i8 %__M) #12 + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_mask_cvtsepi64_epi32(__O, __M, __A); } __m256i test_mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtsepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovs.qd.512 + // APPLE-LABEL: test_mm512_maskz_cvtsepi64_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %__A, <8 x i32> zeroinitializer, i8 %__M) #12 + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtsepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %__A, <8 x i32> zeroinitializer, i8 %__M) #12 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_maskz_cvtsepi64_epi32(__M, __A); } void test_mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi64_storeu_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovs.qd.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtsepi64_storeu_epi32 + // APPLE: entry: + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtsepi64_storeu_epi32 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtsepi64_storeu_epi32(__P, __M, __A); } __m128i test_mm512_cvtsepi64_epi16(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtsepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.qw.512 + // APPLE-LABEL: test_mm512_cvtsepi64_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 -1) #12 + // APPLE-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %1 + // X64-LABEL: test_mm512_cvtsepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 -1) #12 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm512_cvtsepi64_epi16(__A); } __m128i test_mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.qw.512 + // APPLE-LABEL: test_mm512_mask_cvtsepi64_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // APPLE-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %__A, <8 x i16> %0, i8 %__M) #12 + // APPLE-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtsepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %__A, <8 x i16> %0, i8 %__M) #12 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm512_mask_cvtsepi64_epi16(__O, __M, __A); } __m128i test_mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtsepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.qw.512 + // APPLE-LABEL: test_mm512_maskz_cvtsepi64_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M) #12 + // APPLE-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtsepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M) #12 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm512_maskz_cvtsepi64_epi16(__M, __A); } void test_mm512_mask_cvtsepi64_storeu_epi16(void * __P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi64_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.qw.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtsepi64_storeu_epi16 + // APPLE: entry: + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtsepi64_storeu_epi16 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtsepi64_storeu_epi16(__P, __M, __A); } __m128i test_mm512_cvtusepi32_epi8(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtusepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.db.512 + // APPLE-LABEL: test_mm512_cvtusepi32_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 -1) #12 + // APPLE-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %2 + // X64-LABEL: test_mm512_cvtusepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 -1) #12 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm512_cvtusepi32_epi8(__A); } __m128i test_mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.db.512 + // APPLE-LABEL: test_mm512_mask_cvtusepi32_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // APPLE-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %0, <16 x i8> %1, i16 %__M) #12 + // APPLE-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %3 + // X64-LABEL: test_mm512_mask_cvtusepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %0, <16 x i8> %1, i16 %__M) #12 + // X64-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm512_mask_cvtusepi32_epi8(__O, __M, __A); } __m128i test_mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtusepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.db.512 + // APPLE-LABEL: test_mm512_maskz_cvtusepi32_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 %__M) #12 + // APPLE-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %2 + // X64-LABEL: test_mm512_maskz_cvtusepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 %__M) #12 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm512_maskz_cvtusepi32_epi8(__M, __A); } void test_mm512_mask_cvtusepi32_storeu_epi8(void * __P, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi32_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.db.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtusepi32_storeu_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %__P, <16 x i32> %0, i16 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtusepi32_storeu_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %__P, <16 x i32> %0, i16 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtusepi32_storeu_epi8(__P, __M, __A); } __m256i test_mm512_cvtusepi32_epi16(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtusepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.dw.512 + // APPLE-LABEL: test_mm512_cvtusepi32_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %0, <16 x i16> zeroinitializer, i16 -1) #12 + // APPLE-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_cvtusepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %0, <16 x i16> zeroinitializer, i16 -1) #12 + // X64-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_cvtusepi32_epi16(__A); } __m256i test_mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.dw.512 + // APPLE-LABEL: test_mm512_mask_cvtusepi32_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <4 x i64> %__O to <16 x i16> + // APPLE-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %0, <16 x i16> %1, i16 %__M) #12 + // APPLE-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %3 + // X64-LABEL: test_mm512_mask_cvtusepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__O to <16 x i16> + // X64-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %0, <16 x i16> %1, i16 %__M) #12 + // X64-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm512_mask_cvtusepi32_epi16(__O, __M, __A); } __m256i test_mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtusepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.dw.512 + // APPLE-LABEL: test_mm512_maskz_cvtusepi32_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %0, <16 x i16> zeroinitializer, i16 %__M) #12 + // APPLE-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_maskz_cvtusepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %0, <16 x i16> zeroinitializer, i16 %__M) #12 + // X64-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_maskz_cvtusepi32_epi16(__M, __A); } void test_mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi32_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.dw.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtusepi32_storeu_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %__P, <16 x i32> %0, i16 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtusepi32_storeu_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %__P, <16 x i32> %0, i16 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtusepi32_storeu_epi16(__P, __M, __A); } __m128i test_mm512_cvtusepi64_epi8(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtusepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.qb.512 + // APPLE-LABEL: test_mm512_cvtusepi64_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %__A, <16 x i8> zeroinitializer, i8 -1) #12 + // APPLE-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %1 + // X64-LABEL: test_mm512_cvtusepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %__A, <16 x i8> zeroinitializer, i8 -1) #12 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm512_cvtusepi64_epi8(__A); } __m128i test_mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.qb.512 + // APPLE-LABEL: test_mm512_mask_cvtusepi64_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // APPLE-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %__A, <16 x i8> %0, i8 %__M) #12 + // APPLE-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtusepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %__A, <16 x i8> %0, i8 %__M) #12 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm512_mask_cvtusepi64_epi8(__O, __M, __A); } __m128i test_mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtusepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.qb.512 + // APPLE-LABEL: test_mm512_maskz_cvtusepi64_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %__A, <16 x i8> zeroinitializer, i8 %__M) #12 + // APPLE-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtusepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %__A, <16 x i8> zeroinitializer, i8 %__M) #12 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm512_maskz_cvtusepi64_epi8(__M, __A); } void test_mm512_mask_cvtusepi64_storeu_epi8(void * __P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi64_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.qb.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtusepi64_storeu_epi8 + // APPLE: entry: + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtusepi64_storeu_epi8 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtusepi64_storeu_epi8(__P, __M, __A); } __m256i test_mm512_cvtusepi64_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtusepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovus.qd.512 + // APPLE-LABEL: test_mm512_cvtusepi64_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %__A, <8 x i32> zeroinitializer, i8 -1) #12 + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_cvtusepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %__A, <8 x i32> zeroinitializer, i8 -1) #12 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_cvtusepi64_epi32(__A); } __m256i test_mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovus.qd.512 + // APPLE-LABEL: test_mm512_mask_cvtusepi64_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__O to <8 x i32> + // APPLE-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %__A, <8 x i32> %0, i8 %__M) #12 + // APPLE-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtusepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__O to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %__A, <8 x i32> %0, i8 %__M) #12 + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_mask_cvtusepi64_epi32(__O, __M, __A); } __m256i test_mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtusepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovus.qd.512 + // APPLE-LABEL: test_mm512_maskz_cvtusepi64_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %__A, <8 x i32> zeroinitializer, i8 %__M) #12 + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtusepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %__A, <8 x i32> zeroinitializer, i8 %__M) #12 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_maskz_cvtusepi64_epi32(__M, __A); } void test_mm512_mask_cvtusepi64_storeu_epi32(void* __P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi64_storeu_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovus.qd.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtusepi64_storeu_epi32 + // APPLE: entry: + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtusepi64_storeu_epi32 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtusepi64_storeu_epi32(__P, __M, __A); } __m128i test_mm512_cvtusepi64_epi16(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtusepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.qw.512 + // APPLE-LABEL: test_mm512_cvtusepi64_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 -1) #12 + // APPLE-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %1 + // X64-LABEL: test_mm512_cvtusepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 -1) #12 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm512_cvtusepi64_epi16(__A); } __m128i test_mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.qw.512 + // APPLE-LABEL: test_mm512_mask_cvtusepi64_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // APPLE-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %__A, <8 x i16> %0, i8 %__M) #12 + // APPLE-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtusepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %__A, <8 x i16> %0, i8 %__M) #12 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm512_mask_cvtusepi64_epi16(__O, __M, __A); } __m128i test_mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtusepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.qw.512 + // APPLE-LABEL: test_mm512_maskz_cvtusepi64_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M) #12 + // APPLE-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtusepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M) #12 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm512_maskz_cvtusepi64_epi16(__M, __A); } void test_mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi64_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.qw.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtusepi64_storeu_epi16 + // APPLE: entry: + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtusepi64_storeu_epi16 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtusepi64_storeu_epi16(__P, __M, __A); } __m128i test_mm512_cvtepi32_epi8(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi32_epi8 - // CHECK: trunc <16 x i32> %{{.*}} to <16 x i8> + // APPLE-LABEL: test_mm512_cvtepi32_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = trunc <16 x i32> %0 to <16 x i8> + // APPLE-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %2 + // X64-LABEL: test_mm512_cvtepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = trunc <16 x i32> %0 to <16 x i8> + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm512_cvtepi32_epi8(__A); } __m128i test_mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.512 + // APPLE-LABEL: test_mm512_mask_cvtepi32_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // APPLE-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %0, <16 x i8> %1, i16 %__M) #12 + // APPLE-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %3 + // X64-LABEL: test_mm512_mask_cvtepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %0, <16 x i8> %1, i16 %__M) #12 + // X64-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm512_mask_cvtepi32_epi8(__O, __M, __A); } __m128i test_mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.512 + // APPLE-LABEL: test_mm512_maskz_cvtepi32_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 %__M) #12 + // APPLE-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %2 + // X64-LABEL: test_mm512_maskz_cvtepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 %__M) #12 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm512_maskz_cvtepi32_epi8(__M, __A); } void test_mm512_mask_cvtepi32_storeu_epi8(void * __P, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtepi32_storeu_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %__P, <16 x i32> %0, i16 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtepi32_storeu_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %__P, <16 x i32> %0, i16 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtepi32_storeu_epi8(__P, __M, __A); } __m256i test_mm512_cvtepi32_epi16(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi32_epi16 - // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16> + // APPLE-LABEL: test_mm512_cvtepi32_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = trunc <16 x i32> %0 to <16 x i16> + // APPLE-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_cvtepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = trunc <16 x i32> %0 to <16 x i16> + // X64-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_cvtepi32_epi16(__A); } __m256i test_mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.dw.512 + // APPLE-LABEL: test_mm512_mask_cvtepi32_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <4 x i64> %__O to <16 x i16> + // APPLE-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %0, <16 x i16> %1, i16 %__M) #12 + // APPLE-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %3 + // X64-LABEL: test_mm512_mask_cvtepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__O to <16 x i16> + // X64-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %0, <16 x i16> %1, i16 %__M) #12 + // X64-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm512_mask_cvtepi32_epi16(__O, __M, __A); } __m256i test_mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.dw.512 + // APPLE-LABEL: test_mm512_maskz_cvtepi32_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %0, <16 x i16> zeroinitializer, i16 %__M) #12 + // APPLE-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_maskz_cvtepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %0, <16 x i16> zeroinitializer, i16 %__M) #12 + // X64-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_maskz_cvtepi32_epi16(__M, __A); } void test_mm512_mask_cvtepi32_storeu_epi16(void * __P, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.dw.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtepi32_storeu_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %__P, <16 x i32> %0, i16 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtepi32_storeu_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %__P, <16 x i32> %0, i16 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtepi32_storeu_epi16(__P, __M, __A); } __m128i test_mm512_cvtepi64_epi8(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.512 + // APPLE-LABEL: test_mm512_cvtepi64_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %__A, <16 x i8> zeroinitializer, i8 -1) #12 + // APPLE-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %1 + // X64-LABEL: test_mm512_cvtepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %__A, <16 x i8> zeroinitializer, i8 -1) #12 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm512_cvtepi64_epi8(__A); } __m128i test_mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.512 + // APPLE-LABEL: test_mm512_mask_cvtepi64_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // APPLE-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %__A, <16 x i8> %0, i8 %__M) #12 + // APPLE-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %__A, <16 x i8> %0, i8 %__M) #12 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm512_mask_cvtepi64_epi8(__O, __M, __A); } __m128i test_mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.512 + // APPLE-LABEL: test_mm512_maskz_cvtepi64_epi8 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %__A, <16 x i8> zeroinitializer, i8 %__M) #12 + // APPLE-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %__A, <16 x i8> zeroinitializer, i8 %__M) #12 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm512_maskz_cvtepi64_epi8(__M, __A); } void test_mm512_mask_cvtepi64_storeu_epi8(void * __P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi64_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtepi64_storeu_epi8 + // APPLE: entry: + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtepi64_storeu_epi8 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtepi64_storeu_epi8(__P, __M, __A); } __m256i test_mm512_cvtepi64_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi64_epi32 - // CHECK: trunc <8 x i64> %{{.*}} to <8 x i32> + // APPLE-LABEL: test_mm512_cvtepi64_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = trunc <8 x i64> %__A to <8 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_cvtepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = trunc <8 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_cvtepi64_epi32(__A); } __m256i test_mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi64_epi32 - // CHECK: trunc <8 x i64> %{{.*}} to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepi64_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__O to <8 x i32> + // APPLE-NEXT: %1 = trunc <8 x i64> %__A to <8 x i32> + // APPLE-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %0 + // APPLE-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %4 + // X64-LABEL: test_mm512_mask_cvtepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__O to <8 x i32> + // X64-NEXT: %1 = trunc <8 x i64> %__A to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %0 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm512_mask_cvtepi64_epi32(__O, __M, __A); } __m256i test_mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi64_epi32 - // CHECK: trunc <8 x i64> %{{.*}} to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepi64_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = trunc <8 x i64> %__A to <8 x i32> + // APPLE-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i32> %0, <8 x i32> zeroinitializer + // APPLE-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %3 + // X64-LABEL: test_mm512_maskz_cvtepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = trunc <8 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> %0, <8 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm512_maskz_cvtepi64_epi32(__M, __A); } void test_mm512_mask_cvtepi64_storeu_epi32(void* __P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi64_storeu_epi32 - // CHECK: @llvm.x86.avx512.mask.pmov.qd.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtepi64_storeu_epi32 + // APPLE: entry: + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtepi64_storeu_epi32 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtepi64_storeu_epi32(__P, __M, __A); } __m128i test_mm512_cvtepi64_epi16(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi64_epi16 - // CHECK: trunc <8 x i64> %{{.*}} to <8 x i16> + // APPLE-LABEL: test_mm512_cvtepi64_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = trunc <8 x i64> %__A to <8 x i16> + // APPLE-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %1 + // X64-LABEL: test_mm512_cvtepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = trunc <8 x i64> %__A to <8 x i16> + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm512_cvtepi64_epi16(__A); } __m128i test_mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.512 + // APPLE-LABEL: test_mm512_mask_cvtepi64_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // APPLE-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %__A, <8 x i16> %0, i8 %__M) #12 + // APPLE-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %__A, <8 x i16> %0, i8 %__M) #12 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm512_mask_cvtepi64_epi16(__O, __M, __A); } __m128i test_mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.512 + // APPLE-LABEL: test_mm512_maskz_cvtepi64_epi16 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M) #12 + // APPLE-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M) #12 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm512_maskz_cvtepi64_epi16(__M, __A); } void test_mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi64_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.mem.512 + // APPLE-LABEL: test_mm512_mask_cvtepi64_storeu_epi16 + // APPLE: entry: + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_cvtepi64_storeu_epi16 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %__P, <8 x i64> %__A, i8 %__M) #12 + // X64-NEXT: ret void return _mm512_mask_cvtepi64_storeu_epi16(__P, __M, __A); } __m128i test_mm512_extracti32x4_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_extracti32x4_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <4 x i32> + // APPLE-LABEL: test_mm512_extracti32x4_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %extract = shufflevector <16 x i32> %0, <16 x i32> undef, <4 x i32> + // APPLE-NEXT: %1 = bitcast <4 x i32> %extract to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %1 + // X64-LABEL: test_mm512_extracti32x4_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %extract = shufflevector <16 x i32> %0, <16 x i32> undef, <4 x i32> + // X64-NEXT: %1 = bitcast <4 x i32> %extract to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm512_extracti32x4_epi32(__A, 3); } __m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_extracti32x4_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_extracti32x4_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // APPLE-NEXT: %extract = shufflevector <16 x i32> %0, <16 x i32> undef, <4 x i32> + // APPLE-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %extract1 = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // APPLE-NEXT: %3 = select <4 x i1> %extract1, <4 x i32> %extract, <4 x i32> %1 + // APPLE-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %4 + // X64-LABEL: test_mm512_mask_extracti32x4_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %extract = shufflevector <16 x i32> %0, <16 x i32> undef, <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract1 = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract1, <4 x i32> %extract, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm512_mask_extracti32x4_epi32(__W, __U, __A, 3); } __m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_extracti32x4_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_extracti32x4_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %extract = shufflevector <16 x i32> %0, <16 x i32> undef, <4 x i32> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %extract1 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // APPLE-NEXT: %2 = select <4 x i1> %extract1, <4 x i32> %extract, <4 x i32> zeroinitializer + // APPLE-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // APPLE-NEXT: ret <2 x i64> %3 + // X64-LABEL: test_mm512_maskz_extracti32x4_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %extract = shufflevector <16 x i32> %0, <16 x i32> undef, <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract1 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract1, <4 x i32> %extract, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm512_maskz_extracti32x4_epi32(__U, __A, 3); } __m256i test_mm512_extracti64x4_epi64(__m512i __A) { - // CHECK-LABEL: @test_mm512_extracti64x4_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> + // APPLE-LABEL: test_mm512_extracti64x4_epi64 + // APPLE: entry: + // APPLE-NEXT: %extract = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // APPLE-NEXT: ret <4 x i64> %extract + // X64-LABEL: test_mm512_extracti64x4_epi64 + // X64: entry: + // X64-NEXT: %extract = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // X64-NEXT: ret <4 x i64> %extract return _mm512_extracti64x4_epi64(__A, 1); } __m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_extracti64x4_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_extracti64x4_epi64 + // APPLE: entry: + // APPLE-NEXT: %extract = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // APPLE-NEXT: %1 = select <4 x i1> %extract1, <4 x i64> %extract, <4 x i64> %__W + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_mask_extracti64x4_epi64 + // X64: entry: + // X64-NEXT: %extract = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract1, <4 x i64> %extract, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %1 return _mm512_mask_extracti64x4_epi64(__W, __U, __A, 1); } __m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_extracti64x4_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_extracti64x4_epi64 + // APPLE: entry: + // APPLE-NEXT: %extract = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // APPLE-NEXT: %1 = select <4 x i1> %extract1, <4 x i64> %extract, <4 x i64> zeroinitializer + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_maskz_extracti64x4_epi64 + // X64: entry: + // X64-NEXT: %extract = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract1, <4 x i64> %extract, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm512_maskz_extracti64x4_epi64(__U, __A, 1); } __m512d test_mm512_insertf64x4(__m512d __A, __m256d __B) { - // CHECK-LABEL: @test_mm512_insertf64x4 - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_insertf64x4 + // APPLE: entry: + // APPLE-NEXT: %widen = shufflevector <4 x double> %__B, <4 x double> undef, <8 x i32> + // APPLE-NEXT: %insert = shufflevector <8 x double> %__A, <8 x double> %widen, <8 x i32> + // APPLE-NEXT: ret <8 x double> %insert + // X64-LABEL: test_mm512_insertf64x4 + // X64: entry: + // X64-NEXT: %widen = shufflevector <4 x double> %__B, <4 x double> undef, <8 x i32> + // X64-NEXT: %insert = shufflevector <8 x double> %__A, <8 x double> %widen, <8 x i32> + // X64-NEXT: ret <8 x double> %insert return _mm512_insertf64x4(__A, __B, 1); } __m512d test_mm512_mask_insertf64x4(__m512d __W, __mmask8 __U, __m512d __A, __m256d __B) { - // CHECK-LABEL: @test_mm512_mask_insertf64x4 - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_insertf64x4 + // APPLE: entry: + // APPLE-NEXT: %widen = shufflevector <4 x double> %__B, <4 x double> undef, <8 x i32> + // APPLE-NEXT: %insert = shufflevector <8 x double> %__A, <8 x double> %widen, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %insert, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_insertf64x4 + // X64: entry: + // X64-NEXT: %widen = shufflevector <4 x double> %__B, <4 x double> undef, <8 x i32> + // X64-NEXT: %insert = shufflevector <8 x double> %__A, <8 x double> %widen, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %insert, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_insertf64x4(__W, __U, __A, __B, 1); } __m512d test_mm512_maskz_insertf64x4(__mmask8 __U, __m512d __A, __m256d __B) { - // CHECK-LABEL: @test_mm512_maskz_insertf64x4 - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_insertf64x4 + // APPLE: entry: + // APPLE-NEXT: %widen = shufflevector <4 x double> %__B, <4 x double> undef, <8 x i32> + // APPLE-NEXT: %insert = shufflevector <8 x double> %__A, <8 x double> %widen, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %insert, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_insertf64x4 + // X64: entry: + // X64-NEXT: %widen = shufflevector <4 x double> %__B, <4 x double> undef, <8 x i32> + // X64-NEXT: %insert = shufflevector <8 x double> %__A, <8 x double> %widen, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %insert, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_insertf64x4(__U, __A, __B, 1); } __m512i test_mm512_inserti64x4(__m512i __A, __m256i __B) { - // CHECK-LABEL: @test_mm512_inserti64x4 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_inserti64x4 + // APPLE: entry: + // APPLE-NEXT: %widen = shufflevector <4 x i64> %__B, <4 x i64> undef, <8 x i32> + // APPLE-NEXT: %insert = shufflevector <8 x i64> %__A, <8 x i64> %widen, <8 x i32> + // APPLE-NEXT: ret <8 x i64> %insert + // X64-LABEL: test_mm512_inserti64x4 + // X64: entry: + // X64-NEXT: %widen = shufflevector <4 x i64> %__B, <4 x i64> undef, <8 x i32> + // X64-NEXT: %insert = shufflevector <8 x i64> %__A, <8 x i64> %widen, <8 x i32> + // X64-NEXT: ret <8 x i64> %insert return _mm512_inserti64x4(__A, __B, 1); } __m512i test_mm512_mask_inserti64x4(__m512i __W, __mmask8 __U, __m512i __A, __m256i __B) { - // CHECK-LABEL: @test_mm512_mask_inserti64x4 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_inserti64x4 + // APPLE: entry: + // APPLE-NEXT: %widen = shufflevector <4 x i64> %__B, <4 x i64> undef, <8 x i32> + // APPLE-NEXT: %insert = shufflevector <8 x i64> %__A, <8 x i64> %widen, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %insert, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_inserti64x4 + // X64: entry: + // X64-NEXT: %widen = shufflevector <4 x i64> %__B, <4 x i64> undef, <8 x i32> + // X64-NEXT: %insert = shufflevector <8 x i64> %__A, <8 x i64> %widen, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %insert, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_inserti64x4(__W, __U, __A, __B, 1); } __m512i test_mm512_maskz_inserti64x4(__mmask8 __U, __m512i __A, __m256i __B) { - // CHECK-LABEL: @test_mm512_maskz_inserti64x4 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_inserti64x4 + // APPLE: entry: + // APPLE-NEXT: %widen = shufflevector <4 x i64> %__B, <4 x i64> undef, <8 x i32> + // APPLE-NEXT: %insert = shufflevector <8 x i64> %__A, <8 x i64> %widen, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %insert, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_inserti64x4 + // X64: entry: + // X64-NEXT: %widen = shufflevector <4 x i64> %__B, <4 x i64> undef, <8 x i32> + // X64-NEXT: %insert = shufflevector <8 x i64> %__A, <8 x i64> %widen, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %insert, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_inserti64x4(__U, __A, __B, 1); } __m512 test_mm512_insertf32x4(__m512 __A, __m128 __B) { - // CHECK-LABEL: @test_mm512_insertf32x4 - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_insertf32x4 + // APPLE: entry: + // APPLE-NEXT: %widen = shufflevector <4 x float> %__B, <4 x float> undef, <16 x i32> + // APPLE-NEXT: %insert = shufflevector <16 x float> %__A, <16 x float> %widen, <16 x i32> + // APPLE-NEXT: ret <16 x float> %insert + // X64-LABEL: test_mm512_insertf32x4 + // X64: entry: + // X64-NEXT: %widen = shufflevector <4 x float> %__B, <4 x float> undef, <16 x i32> + // X64-NEXT: %insert = shufflevector <16 x float> %__A, <16 x float> %widen, <16 x i32> + // X64-NEXT: ret <16 x float> %insert return _mm512_insertf32x4(__A, __B, 1); } __m512 test_mm512_mask_insertf32x4(__m512 __W, __mmask16 __U, __m512 __A, __m128 __B) { - // CHECK-LABEL: @test_mm512_mask_insertf32x4 - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_insertf32x4 + // APPLE: entry: + // APPLE-NEXT: %widen = shufflevector <4 x float> %__B, <4 x float> undef, <16 x i32> + // APPLE-NEXT: %insert = shufflevector <16 x float> %__A, <16 x float> %widen, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %insert, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_insertf32x4 + // X64: entry: + // X64-NEXT: %widen = shufflevector <4 x float> %__B, <4 x float> undef, <16 x i32> + // X64-NEXT: %insert = shufflevector <16 x float> %__A, <16 x float> %widen, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %insert, <16 x float> %__W + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_insertf32x4(__W, __U, __A, __B, 1); } __m512 test_mm512_maskz_insertf32x4(__mmask16 __U, __m512 __A, __m128 __B) { - // CHECK-LABEL: @test_mm512_maskz_insertf32x4 - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_insertf32x4 + // APPLE: entry: + // APPLE-NEXT: %widen = shufflevector <4 x float> %__B, <4 x float> undef, <16 x i32> + // APPLE-NEXT: %insert = shufflevector <16 x float> %__A, <16 x float> %widen, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %insert, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_insertf32x4 + // X64: entry: + // X64-NEXT: %widen = shufflevector <4 x float> %__B, <4 x float> undef, <16 x i32> + // X64-NEXT: %insert = shufflevector <16 x float> %__A, <16 x float> %widen, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %insert, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_insertf32x4(__U, __A, __B, 1); } __m512i test_mm512_inserti32x4(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_inserti32x4 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_inserti32x4 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // APPLE-NEXT: %widen = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> + // APPLE-NEXT: %insert = shufflevector <16 x i32> %0, <16 x i32> %widen, <16 x i32> + // APPLE-NEXT: %2 = bitcast <16 x i32> %insert to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_inserti32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %widen = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> + // X64-NEXT: %insert = shufflevector <16 x i32> %0, <16 x i32> %widen, <16 x i32> + // X64-NEXT: %2 = bitcast <16 x i32> %insert to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_inserti32x4(__A, __B, 1); } __m512i test_mm512_mask_inserti32x4(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_inserti32x4 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_inserti32x4 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // APPLE-NEXT: %widen = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> + // APPLE-NEXT: %insert = shufflevector <16 x i32> %0, <16 x i32> %widen, <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %insert, <16 x i32> %2 + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_mask_inserti32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %widen = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> + // X64-NEXT: %insert = shufflevector <16 x i32> %0, <16 x i32> %widen, <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %insert, <16 x i32> %2 + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_mask_inserti32x4(__W, __U, __A, __B, 1); } __m512i test_mm512_maskz_inserti32x4(__mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_inserti32x4 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_inserti32x4 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // APPLE-NEXT: %widen = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> + // APPLE-NEXT: %insert = shufflevector <16 x i32> %0, <16 x i32> %widen, <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %insert, <16 x i32> zeroinitializer + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_inserti32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %widen = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> + // X64-NEXT: %insert = shufflevector <16 x i32> %0, <16 x i32> %widen, <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %insert, <16 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_inserti32x4(__U, __A, __B, 1); } __m512d test_mm512_getmant_round_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_getmant_round_pd - // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 + // APPLE-LABEL: test_mm512_getmant_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %__A, i32 9, <8 x double> zeroinitializer, i8 -1, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_getmant_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %__A, i32 9, <8 x double> zeroinitializer, i8 -1, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_getmant_round_pd(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_mask_getmant_round_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_getmant_round_pd - // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 + // APPLE-LABEL: test_mm512_mask_getmant_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %__A, i32 9, <8 x double> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_getmant_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %__A, i32 9, <8 x double> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_getmant_round_pd(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_maskz_getmant_round_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_getmant_round_pd - // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 + // APPLE-LABEL: test_mm512_maskz_getmant_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %__A, i32 9, <8 x double> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_maskz_getmant_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %__A, i32 9, <8 x double> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_maskz_getmant_round_pd(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_getmant_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_getmant_pd - // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 + // APPLE-LABEL: test_mm512_getmant_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %__A, i32 9, <8 x double> zeroinitializer, i8 -1, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_getmant_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %__A, i32 9, <8 x double> zeroinitializer, i8 -1, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_getmant_pd(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m512d test_mm512_mask_getmant_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_getmant_pd - // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 + // APPLE-LABEL: test_mm512_mask_getmant_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %__A, i32 9, <8 x double> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_getmant_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %__A, i32 9, <8 x double> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_getmant_pd(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m512d test_mm512_maskz_getmant_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_getmant_pd - // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 + // APPLE-LABEL: test_mm512_maskz_getmant_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %__A, i32 9, <8 x double> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_maskz_getmant_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %__A, i32 9, <8 x double> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_maskz_getmant_pd(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m512 test_mm512_getmant_round_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_getmant_round_ps - // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 + // APPLE-LABEL: test_mm512_getmant_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %__A, i32 9, <16 x float> zeroinitializer, i16 -1, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_getmant_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %__A, i32 9, <16 x float> zeroinitializer, i16 -1, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_getmant_round_ps(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_mask_getmant_round_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_getmant_round_ps - // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 + // APPLE-LABEL: test_mm512_mask_getmant_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %__A, i32 9, <16 x float> %__W, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_mask_getmant_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %__A, i32 9, <16 x float> %__W, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_mask_getmant_round_ps(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_maskz_getmant_round_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_getmant_round_ps - // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 + // APPLE-LABEL: test_mm512_maskz_getmant_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %__A, i32 9, <16 x float> zeroinitializer, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_maskz_getmant_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %__A, i32 9, <16 x float> zeroinitializer, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_maskz_getmant_round_ps(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_getmant_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_getmant_ps - // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 + // APPLE-LABEL: test_mm512_getmant_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %__A, i32 9, <16 x float> zeroinitializer, i16 -1, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_getmant_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %__A, i32 9, <16 x float> zeroinitializer, i16 -1, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_getmant_ps(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m512 test_mm512_mask_getmant_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_getmant_ps - // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 + // APPLE-LABEL: test_mm512_mask_getmant_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %__A, i32 9, <16 x float> %__W, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_mask_getmant_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %__A, i32 9, <16 x float> %__W, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_mask_getmant_ps(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m512 test_mm512_maskz_getmant_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_getmant_ps - // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 + // APPLE-LABEL: test_mm512_maskz_getmant_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %__A, i32 9, <16 x float> zeroinitializer, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_maskz_getmant_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %__A, i32 9, <16 x float> zeroinitializer, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_maskz_getmant_ps(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m512d test_mm512_getexp_round_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_getexp_round_pd - // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 + // APPLE-LABEL: test_mm512_getexp_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %__A, <8 x double> zeroinitializer, i8 -1, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_getexp_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %__A, <8 x double> zeroinitializer, i8 -1, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_getexp_round_pd(__A, _MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_mask_getexp_round_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_getexp_round_pd - // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 + // APPLE-LABEL: test_mm512_mask_getexp_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %__A, <8 x double> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_getexp_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %__A, <8 x double> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_getexp_round_pd(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_maskz_getexp_round_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_getexp_round_pd - // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 + // APPLE-LABEL: test_mm512_maskz_getexp_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %__A, <8 x double> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_maskz_getexp_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %__A, <8 x double> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_maskz_getexp_round_pd(__U, __A, _MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_getexp_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_getexp_pd - // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 + // APPLE-LABEL: test_mm512_getexp_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %__A, <8 x double> zeroinitializer, i8 -1, i32 4) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_getexp_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %__A, <8 x double> zeroinitializer, i8 -1, i32 4) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_getexp_pd(__A); } __m512d test_mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_getexp_pd - // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 + // APPLE-LABEL: test_mm512_mask_getexp_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %__A, <8 x double> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_getexp_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %__A, <8 x double> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_getexp_pd(__W, __U, __A); } __m512d test_mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_getexp_pd - // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 + // APPLE-LABEL: test_mm512_maskz_getexp_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %__A, <8 x double> zeroinitializer, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_maskz_getexp_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %__A, <8 x double> zeroinitializer, i8 %__U, i32 4) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_maskz_getexp_pd(__U, __A); } __m512 test_mm512_getexp_round_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_getexp_round_ps - // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 + // APPLE-LABEL: test_mm512_getexp_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %__A, <16 x float> zeroinitializer, i16 -1, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_getexp_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %__A, <16 x float> zeroinitializer, i16 -1, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_getexp_round_ps(__A, _MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_mask_getexp_round_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_getexp_round_ps - // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 + // APPLE-LABEL: test_mm512_mask_getexp_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %__A, <16 x float> %__W, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_mask_getexp_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %__A, <16 x float> %__W, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_mask_getexp_round_ps(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_maskz_getexp_round_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_getexp_round_ps - // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 + // APPLE-LABEL: test_mm512_maskz_getexp_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %__A, <16 x float> zeroinitializer, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_maskz_getexp_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %__A, <16 x float> zeroinitializer, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_maskz_getexp_round_ps(__U, __A, _MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_getexp_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_getexp_ps - // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 + // APPLE-LABEL: test_mm512_getexp_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %__A, <16 x float> zeroinitializer, i16 -1, i32 4) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_getexp_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %__A, <16 x float> zeroinitializer, i16 -1, i32 4) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_getexp_ps(__A); } __m512 test_mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_getexp_ps - // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 + // APPLE-LABEL: test_mm512_mask_getexp_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %__A, <16 x float> %__W, i16 %__U, i32 4) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_mask_getexp_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %__A, <16 x float> %__W, i16 %__U, i32 4) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_mask_getexp_ps(__W, __U, __A); } __m512 test_mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_getexp_ps - // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 + // APPLE-LABEL: test_mm512_maskz_getexp_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %__A, <16 x float> zeroinitializer, i16 %__U, i32 4) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_maskz_getexp_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %__A, <16 x float> zeroinitializer, i16 %__U, i32 4) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_maskz_getexp_ps(__U, __A); } __m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i64gather_ps - // CHECK: @llvm.x86.avx512.mask.gather.qps.512 + // APPLE-LABEL: test_mm512_i64gather_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float> zeroinitializer, i8* %__addr, <8 x i64> %__index, <8 x i1> , i32 2) + // APPLE-NEXT: ret <8 x float> %0 + // X64-LABEL: test_mm512_i64gather_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float> zeroinitializer, i8* %__addr, <8 x i64> %__index, <8 x i1> , i32 2) + // X64-NEXT: ret <8 x float> %0 return _mm512_i64gather_ps(__index, __addr, 2); } __m256 test_mm512_mask_i64gather_ps(__m256 __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i64gather_ps - // CHECK: @llvm.x86.avx512.mask.gather.qps.512 + // APPLE-LABEL: test_mm512_mask_i64gather_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // APPLE-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float> %__v1_old, i8* %__addr, <8 x i64> %__index, <8 x i1> %0, i32 2) + // APPLE-NEXT: ret <8 x float> %1 + // X64-LABEL: test_mm512_mask_i64gather_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float> %__v1_old, i8* %__addr, <8 x i64> %__index, <8 x i1> %0, i32 2) + // X64-NEXT: ret <8 x float> %1 return _mm512_mask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); } __m256i test_mm512_i64gather_epi32(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i64gather_epi32 - // CHECK: @llvm.x86.avx512.mask.gather.qpi.512 + // APPLE-LABEL: test_mm512_i64gather_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512(<8 x i32> zeroinitializer, i8* %__addr, <8 x i64> %__index, <8 x i1> , i32 2) + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_i64gather_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512(<8 x i32> zeroinitializer, i8* %__addr, <8 x i64> %__index, <8 x i1> , i32 2) + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_i64gather_epi32(__index, __addr, 2); } __m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i64gather_epi32 - // CHECK: @llvm.x86.avx512.mask.gather.qpi.512 + // APPLE-LABEL: test_mm512_mask_i64gather_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__v1_old to <8 x i32> + // APPLE-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512(<8 x i32> %0, i8* %__addr, <8 x i64> %__index, <8 x i1> %1, i32 2) + // APPLE-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %3 + // X64-LABEL: test_mm512_mask_i64gather_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__v1_old to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512(<8 x i32> %0, i8* %__addr, <8 x i64> %__index, <8 x i1> %1, i32 2) + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); } __m512d test_mm512_i64gather_pd(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i64gather_pd - // CHECK: @llvm.x86.avx512.mask.gather.qpd.512 + // APPLE-LABEL: test_mm512_i64gather_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512(<8 x double> zeroinitializer, i8* %__addr, <8 x i64> %__index, <8 x i1> , i32 2) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_i64gather_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512(<8 x double> zeroinitializer, i8* %__addr, <8 x i64> %__index, <8 x i1> , i32 2) + // X64-NEXT: ret <8 x double> %0 return _mm512_i64gather_pd(__index, __addr, 2); } __m512d test_mm512_mask_i64gather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i64gather_pd - // CHECK: @llvm.x86.avx512.mask.gather.qpd.512 + // APPLE-LABEL: test_mm512_mask_i64gather_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512(<8 x double> %__v1_old, i8* %__addr, <8 x i64> %__index, <8 x i1> %0, i32 2) + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_i64gather_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512(<8 x double> %__v1_old, i8* %__addr, <8 x i64> %__index, <8 x i1> %0, i32 2) + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); } __m512i test_mm512_i64gather_epi64(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i64gather_epi64 - // CHECK: @llvm.x86.avx512.mask.gather.qpq.512 + // APPLE-LABEL: test_mm512_i64gather_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512(<8 x i64> zeroinitializer, i8* %__addr, <8 x i64> %__index, <8 x i1> , i32 2) + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_i64gather_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512(<8 x i64> zeroinitializer, i8* %__addr, <8 x i64> %__index, <8 x i1> , i32 2) + // X64-NEXT: ret <8 x i64> %0 return _mm512_i64gather_epi64(__index, __addr, 2); } __m512i test_mm512_mask_i64gather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i64gather_epi64 - // CHECK: @llvm.x86.avx512.mask.gather.qpq.512 + // APPLE-LABEL: test_mm512_mask_i64gather_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // APPLE-NEXT: %1 = tail call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512(<8 x i64> %__v1_old, i8* %__addr, <8 x i64> %__index, <8 x i1> %0, i32 2) + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_i64gather_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %1 = tail call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512(<8 x i64> %__v1_old, i8* %__addr, <8 x i64> %__index, <8 x i1> %0, i32 2) + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); } __m512 test_mm512_i32gather_ps(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i32gather_ps - // CHECK: @llvm.x86.avx512.mask.gather.dps.512 + // APPLE-LABEL: test_mm512_i32gather_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.gather.dps.512(<16 x float> zeroinitializer, i8* %__addr, <16 x i32> %0, <16 x i1> , i32 2) + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_i32gather_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.gather.dps.512(<16 x float> zeroinitializer, i8* %__addr, <16 x i32> %0, <16 x i1> , i32 2) + // X64-NEXT: ret <16 x float> %1 return _mm512_i32gather_ps(__index, __addr, 2); } __m512 test_mm512_mask_i32gather_ps(__m512 v1_old, __mmask16 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i32gather_ps - // CHECK: @llvm.x86.avx512.mask.gather.dps.512 + // APPLE-LABEL: test_mm512_mask_i32gather_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__mask to <16 x i1> + // APPLE-NEXT: %2 = tail call <16 x float> @llvm.x86.avx512.mask.gather.dps.512(<16 x float> %v1_old, i8* %__addr, <16 x i32> %0, <16 x i1> %1, i32 2) + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_i32gather_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__mask to <16 x i1> + // X64-NEXT: %2 = tail call <16 x float> @llvm.x86.avx512.mask.gather.dps.512(<16 x float> %v1_old, i8* %__addr, <16 x i32> %0, <16 x i1> %1, i32 2) + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_i32gather_ps(v1_old, __mask, __index, __addr, 2); } __m512i test_mm512_i32gather_epi32(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i32gather_epi32 - // CHECK: @llvm.x86.avx512.mask.gather.dpi.512 + // APPLE-LABEL: test_mm512_i32gather_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512(<16 x i32> zeroinitializer, i8* %__addr, <16 x i32> %0, <16 x i1> , i32 2) + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_i32gather_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512(<16 x i32> zeroinitializer, i8* %__addr, <16 x i32> %0, <16 x i1> , i32 2) + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_i32gather_epi32(__index, __addr, 2); } __m512i test_mm512_mask_i32gather_epi32(__m512i __v1_old, __mmask16 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i32gather_epi32 - // CHECK: @llvm.x86.avx512.mask.gather.dpi.512 + // APPLE-LABEL: test_mm512_mask_i32gather_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__v1_old to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__index to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__mask to <16 x i1> + // APPLE-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512(<16 x i32> %0, i8* %__addr, <16 x i32> %1, <16 x i1> %2, i32 2) + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_i32gather_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__v1_old to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__index to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__mask to <16 x i1> + // X64-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512(<16 x i32> %0, i8* %__addr, <16 x i32> %1, <16 x i1> %2, i32 2) + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); } __m512d test_mm512_i32gather_pd(__m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i32gather_pd - // CHECK: @llvm.x86.avx512.mask.gather.dpd.512 + // APPLE-LABEL: test_mm512_i32gather_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512(<8 x double> zeroinitializer, i8* %__addr, <8 x i32> %0, <8 x i1> , i32 2) + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_i32gather_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512(<8 x double> zeroinitializer, i8* %__addr, <8 x i32> %0, <8 x i1> , i32 2) + // X64-NEXT: ret <8 x double> %1 return _mm512_i32gather_pd(__index, __addr, 2); } __m512d test_mm512_mask_i32gather_pd(__m512d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i32gather_pd - // CHECK: @llvm.x86.avx512.mask.gather.dpd.512 + // APPLE-LABEL: test_mm512_mask_i32gather_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // APPLE-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512(<8 x double> %__v1_old, i8* %__addr, <8 x i32> %0, <8 x i1> %1, i32 2) + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_i32gather_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %2 = tail call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512(<8 x double> %__v1_old, i8* %__addr, <8 x i32> %0, <8 x i1> %1, i32 2) + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); } __m512i test_mm512_i32gather_epi64(__m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i32gather_epi64 - // CHECK: @llvm.x86.avx512.mask.gather.dpq.512 + // APPLE-LABEL: test_mm512_i32gather_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // APPLE-NEXT: %1 = tail call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512(<8 x i64> zeroinitializer, i8* %__addr, <8 x i32> %0, <8 x i1> , i32 2) + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_i32gather_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512(<8 x i64> zeroinitializer, i8* %__addr, <8 x i32> %0, <8 x i1> , i32 2) + // X64-NEXT: ret <8 x i64> %1 return _mm512_i32gather_epi64(__index, __addr, 2); } __m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i32gather_epi64 - // CHECK: @llvm.x86.avx512.mask.gather.dpq.512 + // APPLE-LABEL: test_mm512_mask_i32gather_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // APPLE-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512(<8 x i64> %__v1_old, i8* %__addr, <8 x i32> %0, <8 x i1> %1, i32 2) + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_i32gather_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %2 = tail call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512(<8 x i64> %__v1_old, i8* %__addr, <8 x i32> %0, <8 x i1> %1, i32 2) + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); } void test_mm512_i64scatter_ps(void *__addr, __m512i __index, __m256 __v1) { - // CHECK-LABEL: @test_mm512_i64scatter_ps - // CHECK: @llvm.x86.avx512.mask.scatter.qps.512 + // APPLE-LABEL: test_mm512_i64scatter_ps + // APPLE: entry: + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qps.512(i8* %__addr, <8 x i1> , <8 x i64> %__index, <8 x float> %__v1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_i64scatter_ps + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qps.512(i8* %__addr, <8 x i1> , <8 x i64> %__index, <8 x float> %__v1, i32 2) + // X64-NEXT: ret void return _mm512_i64scatter_ps(__addr, __index, __v1, 2); } void test_mm512_mask_i64scatter_ps(void *__addr, __mmask8 __mask, __m512i __index, __m256 __v1) { - // CHECK-LABEL: @test_mm512_mask_i64scatter_ps - // CHECK: @llvm.x86.avx512.mask.scatter.qps.512 + // APPLE-LABEL: test_mm512_mask_i64scatter_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qps.512(i8* %__addr, <8 x i1> %0, <8 x i64> %__index, <8 x float> %__v1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_i64scatter_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qps.512(i8* %__addr, <8 x i1> %0, <8 x i64> %__index, <8 x float> %__v1, i32 2) + // X64-NEXT: ret void return _mm512_mask_i64scatter_ps(__addr, __mask, __index, __v1, 2); } void test_mm512_i64scatter_epi32(void *__addr, __m512i __index, __m256i __v1) { - // CHECK-LABEL: @test_mm512_i64scatter_epi32 - // CHECK: @llvm.x86.avx512.mask.scatter.qpi.512 + // APPLE-LABEL: test_mm512_i64scatter_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__v1 to <8 x i32> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qpi.512(i8* %__addr, <8 x i1> , <8 x i64> %__index, <8 x i32> %0, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_i64scatter_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__v1 to <8 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qpi.512(i8* %__addr, <8 x i1> , <8 x i64> %__index, <8 x i32> %0, i32 2) + // X64-NEXT: ret void return _mm512_i64scatter_epi32(__addr, __index, __v1, 2); } void test_mm512_mask_i64scatter_epi32(void *__addr, __mmask8 __mask, __m512i __index, __m256i __v1) { - // CHECK-LABEL: @test_mm512_mask_i64scatter_epi32 - // CHECK: @llvm.x86.avx512.mask.scatter.qpi.512 + // APPLE-LABEL: test_mm512_mask_i64scatter_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__v1 to <8 x i32> + // APPLE-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qpi.512(i8* %__addr, <8 x i1> %1, <8 x i64> %__index, <8 x i32> %0, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_i64scatter_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__v1 to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qpi.512(i8* %__addr, <8 x i1> %1, <8 x i64> %__index, <8 x i32> %0, i32 2) + // X64-NEXT: ret void return _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, 2); } void test_mm512_i64scatter_pd(void *__addr, __m512i __index, __m512d __v1) { - // CHECK-LABEL: @test_mm512_i64scatter_pd - // CHECK: @llvm.x86.avx512.mask.scatter.qpd.512 + // APPLE-LABEL: test_mm512_i64scatter_pd + // APPLE: entry: + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qpd.512(i8* %__addr, <8 x i1> , <8 x i64> %__index, <8 x double> %__v1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_i64scatter_pd + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qpd.512(i8* %__addr, <8 x i1> , <8 x i64> %__index, <8 x double> %__v1, i32 2) + // X64-NEXT: ret void return _mm512_i64scatter_pd(__addr, __index, __v1, 2); } void test_mm512_mask_i64scatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) { - // CHECK-LABEL: @test_mm512_mask_i64scatter_pd - // CHECK: @llvm.x86.avx512.mask.scatter.qpd.512 + // APPLE-LABEL: test_mm512_mask_i64scatter_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qpd.512(i8* %__addr, <8 x i1> %0, <8 x i64> %__index, <8 x double> %__v1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_i64scatter_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qpd.512(i8* %__addr, <8 x i1> %0, <8 x i64> %__index, <8 x double> %__v1, i32 2) + // X64-NEXT: ret void return _mm512_mask_i64scatter_pd(__addr, __mask, __index, __v1, 2); } void test_mm512_i64scatter_epi64(void *__addr, __m512i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_i64scatter_epi64 - // CHECK: @llvm.x86.avx512.mask.scatter.qpq.512 + // APPLE-LABEL: test_mm512_i64scatter_epi64 + // APPLE: entry: + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qpq.512(i8* %__addr, <8 x i1> , <8 x i64> %__index, <8 x i64> %__v1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_i64scatter_epi64 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qpq.512(i8* %__addr, <8 x i1> , <8 x i64> %__index, <8 x i64> %__v1, i32 2) + // X64-NEXT: ret void return _mm512_i64scatter_epi64(__addr, __index, __v1, 2); } void test_mm512_mask_i64scatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_mask_i64scatter_epi64 - // CHECK: @llvm.x86.avx512.mask.scatter.qpq.512 + // APPLE-LABEL: test_mm512_mask_i64scatter_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qpq.512(i8* %__addr, <8 x i1> %0, <8 x i64> %__index, <8 x i64> %__v1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_i64scatter_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.qpq.512(i8* %__addr, <8 x i1> %0, <8 x i64> %__index, <8 x i64> %__v1, i32 2) + // X64-NEXT: ret void return _mm512_mask_i64scatter_epi64(__addr, __mask, __index, __v1, 2); } void test_mm512_i32scatter_ps(void *__addr, __m512i __index, __m512 __v1) { - // CHECK-LABEL: @test_mm512_i32scatter_ps - // CHECK: @llvm.x86.avx512.mask.scatter.dps.512 + // APPLE-LABEL: test_mm512_i32scatter_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dps.512(i8* %__addr, <16 x i1> , <16 x i32> %0, <16 x float> %__v1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_i32scatter_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dps.512(i8* %__addr, <16 x i1> , <16 x i32> %0, <16 x float> %__v1, i32 2) + // X64-NEXT: ret void return _mm512_i32scatter_ps(__addr, __index, __v1, 2); } void test_mm512_mask_i32scatter_ps(void *__addr, __mmask16 __mask, __m512i __index, __m512 __v1) { - // CHECK-LABEL: @test_mm512_mask_i32scatter_ps - // CHECK: @llvm.x86.avx512.mask.scatter.dps.512 + // APPLE-LABEL: test_mm512_mask_i32scatter_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__mask to <16 x i1> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dps.512(i8* %__addr, <16 x i1> %1, <16 x i32> %0, <16 x float> %__v1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_i32scatter_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__mask to <16 x i1> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dps.512(i8* %__addr, <16 x i1> %1, <16 x i32> %0, <16 x float> %__v1, i32 2) + // X64-NEXT: ret void return _mm512_mask_i32scatter_ps(__addr, __mask, __index, __v1, 2); } void test_mm512_i32scatter_epi32(void *__addr, __m512i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_i32scatter_epi32 - // CHECK: @llvm.x86.avx512.mask.scatter.dpi.512 + // APPLE-LABEL: test_mm512_i32scatter_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__v1 to <16 x i32> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dpi.512(i8* %__addr, <16 x i1> , <16 x i32> %0, <16 x i32> %1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_i32scatter_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__v1 to <16 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dpi.512(i8* %__addr, <16 x i1> , <16 x i32> %0, <16 x i32> %1, i32 2) + // X64-NEXT: ret void return _mm512_i32scatter_epi32(__addr, __index, __v1, 2); } void test_mm512_mask_i32scatter_epi32(void *__addr, __mmask16 __mask, __m512i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_mask_i32scatter_epi32 - // CHECK: @llvm.x86.avx512.mask.scatter.dpi.512 + // APPLE-LABEL: test_mm512_mask_i32scatter_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__v1 to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__mask to <16 x i1> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dpi.512(i8* %__addr, <16 x i1> %2, <16 x i32> %0, <16 x i32> %1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_i32scatter_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__index to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__v1 to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__mask to <16 x i1> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dpi.512(i8* %__addr, <16 x i1> %2, <16 x i32> %0, <16 x i32> %1, i32 2) + // X64-NEXT: ret void return _mm512_mask_i32scatter_epi32(__addr, __mask, __index, __v1, 2); } void test_mm512_i32scatter_pd(void *__addr, __m256i __index, __m512d __v1) { - // CHECK-LABEL: @test_mm512_i32scatter_pd - // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512 + // APPLE-LABEL: test_mm512_i32scatter_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dpd.512(i8* %__addr, <8 x i1> , <8 x i32> %0, <8 x double> %__v1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_i32scatter_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dpd.512(i8* %__addr, <8 x i1> , <8 x i32> %0, <8 x double> %__v1, i32 2) + // X64-NEXT: ret void return _mm512_i32scatter_pd(__addr, __index, __v1, 2); } void test_mm512_mask_i32scatter_pd(void *__addr, __mmask8 __mask, __m256i __index, __m512d __v1) { - // CHECK-LABEL: @test_mm512_mask_i32scatter_pd - // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512 + // APPLE-LABEL: test_mm512_mask_i32scatter_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // APPLE-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dpd.512(i8* %__addr, <8 x i1> %1, <8 x i32> %0, <8 x double> %__v1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_i32scatter_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dpd.512(i8* %__addr, <8 x i1> %1, <8 x i32> %0, <8 x double> %__v1, i32 2) + // X64-NEXT: ret void return _mm512_mask_i32scatter_pd(__addr, __mask, __index, __v1, 2); } void test_mm512_i32scatter_epi64(void *__addr, __m256i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_i32scatter_epi64 - // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512 + // APPLE-LABEL: test_mm512_i32scatter_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dpq.512(i8* %__addr, <8 x i1> , <8 x i32> %0, <8 x i64> %__v1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_i32scatter_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dpq.512(i8* %__addr, <8 x i1> , <8 x i32> %0, <8 x i64> %__v1, i32 2) + // X64-NEXT: ret void return _mm512_i32scatter_epi64(__addr, __index, __v1, 2); } void test_mm512_mask_i32scatter_epi64(void *__addr, __mmask8 __mask, __m256i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_mask_i32scatter_epi64 - // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512 + // APPLE-LABEL: test_mm512_mask_i32scatter_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // APPLE-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // APPLE-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dpq.512(i8* %__addr, <8 x i1> %1, <8 x i32> %0, <8 x i64> %__v1, i32 2) + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_i32scatter_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatter.dpq.512(i8* %__addr, <8 x i1> %1, <8 x i32> %0, <8 x i64> %__v1, i32 2) + // X64-NEXT: ret void return _mm512_mask_i32scatter_epi64(__addr, __mask, __index, __v1, 2); } __m128d test_mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_rsqrt14_sd - // CHECK: @llvm.x86.avx512.rsqrt14.sd + // APPLE-LABEL: test_mm_mask_rsqrt14_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_rsqrt14_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_mask_rsqrt14_sd(__W, __U, __A, __B); } __m128d test_mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_rsqrt14_sd - // CHECK: @llvm.x86.avx512.rsqrt14.sd + // APPLE-LABEL: test_mm_maskz_rsqrt14_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_rsqrt14_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_rsqrt14_sd(__U, __A, __B); } __m128 test_mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_rsqrt14_ss - // CHECK: @llvm.x86.avx512.rsqrt14.ss + // APPLE-LABEL: test_mm_mask_rsqrt14_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_rsqrt14_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_mask_rsqrt14_ss(__W, __U, __A, __B); } __m128 test_mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_rsqrt14_ss - // CHECK: @llvm.x86.avx512.rsqrt14.ss + // APPLE-LABEL: test_mm_maskz_rsqrt14_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_rsqrt14_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_rsqrt14_ss(__U, __A, __B); } __m512d test_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_rcp14_pd - // CHECK: @llvm.x86.avx512.rcp14.pd.512 + // APPLE-LABEL: test_mm512_mask_rcp14_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %__A, <8 x double> %__W, i8 %__U) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_rcp14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %__A, <8 x double> %__W, i8 %__U) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_rcp14_pd (__W,__U,__A); } __m512d test_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_rcp14_pd - // CHECK: @llvm.x86.avx512.rcp14.pd.512 + // APPLE-LABEL: test_mm512_maskz_rcp14_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %__A, <8 x double> zeroinitializer, i8 %__U) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_maskz_rcp14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %__A, <8 x double> zeroinitializer, i8 %__U) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_maskz_rcp14_pd (__U,__A); } __m512 test_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_rcp14_ps - // CHECK: @llvm.x86.avx512.rcp14.ps.512 + // APPLE-LABEL: test_mm512_mask_rcp14_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %__A, <16 x float> %__W, i16 %__U) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_mask_rcp14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %__A, <16 x float> %__W, i16 %__U) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_mask_rcp14_ps (__W,__U,__A); } __m512 test_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_rcp14_ps - // CHECK: @llvm.x86.avx512.rcp14.ps.512 + // APPLE-LABEL: test_mm512_maskz_rcp14_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %__A, <16 x float> zeroinitializer, i16 %__U) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_maskz_rcp14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %__A, <16 x float> zeroinitializer, i16 %__U) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_maskz_rcp14_ps (__U,__A); } __m128d test_mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_rcp14_sd - // CHECK: @llvm.x86.avx512.rcp14.sd + // APPLE-LABEL: test_mm_mask_rcp14_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_rcp14_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_mask_rcp14_sd(__W, __U, __A, __B); } __m128d test_mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_rcp14_sd - // CHECK: @llvm.x86.avx512.rcp14.sd + // APPLE-LABEL: test_mm_maskz_rcp14_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_rcp14_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_rcp14_sd(__U, __A, __B); } __m128 test_mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_rcp14_ss - // CHECK: @llvm.x86.avx512.rcp14.ss + // APPLE-LABEL: test_mm_mask_rcp14_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_rcp14_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_mask_rcp14_ss(__W, __U, __A, __B); } __m128 test_mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_rcp14_ss - // CHECK: @llvm.x86.avx512.rcp14.ss + // APPLE-LABEL: test_mm_maskz_rcp14_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_rcp14_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_rcp14_ss(__U, __A, __B); } __m128d test_mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_getexp_sd - // CHECK: @llvm.x86.avx512.mask.getexp.sd + // APPLE-LABEL: test_mm_mask_getexp_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_getexp_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_mask_getexp_sd(__W, __U, __A, __B); } __m128d test_mm_mask_getexp_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_getexp_round_sd - // CHECK: @llvm.x86.avx512.mask.getexp.sd + // APPLE-LABEL: test_mm_mask_getexp_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_getexp_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_getexp_round_sd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); } __m128d test_mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_getexp_sd - // CHECK: @llvm.x86.avx512.mask.getexp.sd + // APPLE-LABEL: test_mm_maskz_getexp_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_getexp_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 4) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_getexp_sd(__U, __A, __B); } __m128d test_mm_maskz_getexp_round_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_getexp_round_sd - // CHECK: @llvm.x86.avx512.mask.getexp.sd + // APPLE-LABEL: test_mm_maskz_getexp_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_getexp_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_getexp_round_sd(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_getexp_ss - // CHECK: @llvm.x86.avx512.mask.getexp.ss + // APPLE-LABEL: test_mm_mask_getexp_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_getexp_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_mask_getexp_ss(__W, __U, __A, __B); } __m128 test_mm_mask_getexp_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_getexp_round_ss - // CHECK: @llvm.x86.avx512.mask.getexp.ss + // APPLE-LABEL: test_mm_mask_getexp_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_getexp_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_mask_getexp_round_ss(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_getexp_ss - // CHECK: @llvm.x86.avx512.mask.getexp.ss + // APPLE-LABEL: test_mm_maskz_getexp_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_getexp_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 4) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_getexp_ss(__U, __A, __B); } __m128 test_mm_maskz_getexp_round_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_getexp_round_ss - // CHECK: @llvm.x86.avx512.mask.getexp.ss + // APPLE-LABEL: test_mm_maskz_getexp_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_getexp_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_getexp_round_ss(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); } __m128d test_mm_mask_getmant_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_getmant_sd - // CHECK: @llvm.x86.avx512.mask.getmant.sd + // APPLE-LABEL: test_mm_mask_getmant_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %__A, <2 x double> %__B, i32 9, <2 x double> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_getmant_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %__A, <2 x double> %__B, i32 9, <2 x double> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_getmant_sd(__W, __U, __A, __B, 1, 2); } __m128d test_mm_mask_getmant_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_getmant_round_sd - // CHECK: @llvm.x86.avx512.mask.getmant.sd + // APPLE-LABEL: test_mm_mask_getmant_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %__A, <2 x double> %__B, i32 9, <2 x double> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_getmant_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %__A, <2 x double> %__B, i32 9, <2 x double> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_getmant_round_sd(__W, __U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION); } __m128d test_mm_maskz_getmant_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_getmant_sd - // CHECK: @llvm.x86.avx512.mask.getmant.sd + // APPLE-LABEL: test_mm_maskz_getmant_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %__A, <2 x double> %__B, i32 9, <2 x double> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_getmant_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %__A, <2 x double> %__B, i32 9, <2 x double> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_getmant_sd(__U, __A, __B, 1, 2); } __m128d test_mm_maskz_getmant_round_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_getmant_round_sd - // CHECK: @llvm.x86.avx512.mask.getmant.sd + // APPLE-LABEL: test_mm_maskz_getmant_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %__A, <2 x double> %__B, i32 9, <2 x double> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_getmant_round_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %__A, <2 x double> %__B, i32 9, <2 x double> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_getmant_round_sd(__U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_mask_getmant_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_getmant_ss - // CHECK: @llvm.x86.avx512.mask.getmant.ss + // APPLE-LABEL: test_mm_mask_getmant_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %__A, <4 x float> %__B, i32 9, <4 x float> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_getmant_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %__A, <4 x float> %__B, i32 9, <4 x float> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_mask_getmant_ss(__W, __U, __A, __B, 1, 2); } __m128 test_mm_mask_getmant_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_getmant_round_ss - // CHECK: @llvm.x86.avx512.mask.getmant.ss + // APPLE-LABEL: test_mm_mask_getmant_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %__A, <4 x float> %__B, i32 9, <4 x float> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_getmant_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %__A, <4 x float> %__B, i32 9, <4 x float> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_mask_getmant_round_ss(__W, __U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_maskz_getmant_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_getmant_ss - // CHECK: @llvm.x86.avx512.mask.getmant.ss + // APPLE-LABEL: test_mm_maskz_getmant_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %__A, <4 x float> %__B, i32 9, <4 x float> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_getmant_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %__A, <4 x float> %__B, i32 9, <4 x float> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_getmant_ss(__U, __A, __B, 1, 2); } __m128 test_mm_maskz_getmant_round_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_getmant_round_ss - // CHECK: @llvm.x86.avx512.mask.getmant.ss + // APPLE-LABEL: test_mm_maskz_getmant_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %__A, <4 x float> %__B, i32 9, <4 x float> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_getmant_round_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %__A, <4 x float> %__B, i32 9, <4 x float> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_getmant_round_ss(__U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fmadd_ss - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fmadd_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #12 + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // APPLE-NEXT: %6 = select i1 %5, float %3, float %0 + // APPLE-NEXT: %7 = insertelement <4 x float> %__W, float %6, i64 0 + // APPLE-NEXT: ret <4 x float> %7 + // X64-LABEL: test_mm_mask_fmadd_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #12 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // X64-NEXT: %6 = select i1 %5, float %3, float %0 + // X64-NEXT: %7 = insertelement <4 x float> %__W, float %6, i64 0 + // X64-NEXT: ret <4 x float> %7 return _mm_mask_fmadd_ss(__W, __U, __A, __B); } __m128 test_mm_fmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_fmadd_round_ss - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[FMA]], i64 0 + // APPLE-LABEL: test_mm_fmadd_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__C, i64 0 + // APPLE-NEXT: %3 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %2, i32 8) + // APPLE-NEXT: %4 = insertelement <4 x float> %__A, float %3, i64 0 + // APPLE-NEXT: ret <4 x float> %4 + // X64-LABEL: test_mm_fmadd_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__C, i64 0 + // X64-NEXT: %3 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %2, i32 8) + // X64-NEXT: %4 = insertelement <4 x float> %__A, float %3, i64 0 + // X64-NEXT: ret <4 x float> %4 return _mm_fmadd_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_fmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fmadd_round_ss - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fmadd_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %3 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %2, i32 8) + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // APPLE-NEXT: %6 = select i1 %5, float %3, float %0 + // APPLE-NEXT: %7 = insertelement <4 x float> %__W, float %6, i64 0 + // APPLE-NEXT: ret <4 x float> %7 + // X64-LABEL: test_mm_mask_fmadd_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %3 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %2, i32 8) + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // X64-NEXT: %6 = select i1 %5, float %3, float %0 + // X64-NEXT: %7 = insertelement <4 x float> %__W, float %6, i64 0 + // X64-NEXT: ret <4 x float> %7 return _mm_mask_fmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fmadd_ss - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00 - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fmadd_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__C, i64 0 + // APPLE-NEXT: %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #12 + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // APPLE-NEXT: %6 = select i1 %5, float %3, float 0.000000e+00 + // APPLE-NEXT: %7 = insertelement <4 x float> %__A, float %6, i64 0 + // APPLE-NEXT: ret <4 x float> %7 + // X64-LABEL: test_mm_maskz_fmadd_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__C, i64 0 + // X64-NEXT: %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #12 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // X64-NEXT: %6 = select i1 %5, float %3, float 0.000000e+00 + // X64-NEXT: %7 = insertelement <4 x float> %__A, float %6, i64 0 + // X64-NEXT: ret <4 x float> %7 return _mm_maskz_fmadd_ss(__U, __A, __B, __C); } __m128 test_mm_maskz_fmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fmadd_round_ss - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00 - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fmadd_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__C, i64 0 + // APPLE-NEXT: %3 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %2, i32 8) + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // APPLE-NEXT: %6 = select i1 %5, float %3, float 0.000000e+00 + // APPLE-NEXT: %7 = insertelement <4 x float> %__A, float %6, i64 0 + // APPLE-NEXT: ret <4 x float> %7 + // X64-LABEL: test_mm_maskz_fmadd_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__C, i64 0 + // X64-NEXT: %3 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %2, i32 8) + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // X64-NEXT: %6 = select i1 %5, float %3, float 0.000000e+00 + // X64-NEXT: %7 = insertelement <4 x float> %__A, float %6, i64 0 + // X64-NEXT: ret <4 x float> %7 return _mm_maskz_fmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmadd_ss - // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[ORIGC:%.+]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fmadd_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__Y, i64 0 + // APPLE-NEXT: %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #12 + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // APPLE-NEXT: %6 = select i1 %5, float %3, float %2 + // APPLE-NEXT: %7 = insertelement <4 x float> %__Y, float %6, i64 0 + // APPLE-NEXT: ret <4 x float> %7 + // X64-LABEL: test_mm_mask3_fmadd_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__Y, i64 0 + // X64-NEXT: %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #12 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // X64-NEXT: %6 = select i1 %5, float %3, float %2 + // X64-NEXT: %7 = insertelement <4 x float> %__Y, float %6, i64 0 + // X64-NEXT: ret <4 x float> %7 return _mm_mask3_fmadd_ss(__W, __X, __Y, __U); } __m128 test_mm_mask3_fmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmadd_round_ss - // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[ORIGC:%.+]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fmadd_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__Y, i64 0 + // APPLE-NEXT: %3 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %2, i32 8) + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // APPLE-NEXT: %6 = select i1 %5, float %3, float %2 + // APPLE-NEXT: %7 = insertelement <4 x float> %__Y, float %6, i64 0 + // APPLE-NEXT: ret <4 x float> %7 + // X64-LABEL: test_mm_mask3_fmadd_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__Y, i64 0 + // X64-NEXT: %3 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %2, i32 8) + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // X64-NEXT: %6 = select i1 %5, float %3, float %2 + // X64-NEXT: %7 = insertelement <4 x float> %__Y, float %6, i64 0 + // X64-NEXT: ret <4 x float> %7 return _mm_mask3_fmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fmsub_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fmsub_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %3 = fsub float -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call float @llvm.fma.f32(float %0, float %1, float %3) #12 + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, float %4, float %0 + // APPLE-NEXT: %8 = insertelement <4 x float> %__W, float %7, i64 0 + // APPLE-NEXT: ret <4 x float> %8 + // X64-LABEL: test_mm_mask_fmsub_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %3 = fsub float -0.000000e+00, %2 + // X64-NEXT: %4 = tail call float @llvm.fma.f32(float %0, float %1, float %3) #12 + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, float %4, float %0 + // X64-NEXT: %8 = insertelement <4 x float> %__W, float %7, i64 0 + // X64-NEXT: ret <4 x float> %8 return _mm_mask_fmsub_ss(__W, __U, __A, __B); } __m128 test_mm_fmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_fmsub_round_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[FMA]], i64 0 + // APPLE-LABEL: test_mm_fmsub_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__C, i64 0 + // APPLE-NEXT: %3 = fsub float -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %3, i32 8) + // APPLE-NEXT: %5 = insertelement <4 x float> %__A, float %4, i64 0 + // APPLE-NEXT: ret <4 x float> %5 + // X64-LABEL: test_mm_fmsub_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__C, i64 0 + // X64-NEXT: %3 = fsub float -0.000000e+00, %2 + // X64-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %3, i32 8) + // X64-NEXT: %5 = insertelement <4 x float> %__A, float %4, i64 0 + // X64-NEXT: ret <4 x float> %5 return _mm_fmsub_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_fmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fmsub_round_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fmsub_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %3 = fsub float -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %3, i32 8) + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, float %4, float %0 + // APPLE-NEXT: %8 = insertelement <4 x float> %__W, float %7, i64 0 + // APPLE-NEXT: ret <4 x float> %8 + // X64-LABEL: test_mm_mask_fmsub_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %3 = fsub float -0.000000e+00, %2 + // X64-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %3, i32 8) + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, float %4, float %0 + // X64-NEXT: %8 = insertelement <4 x float> %__W, float %7, i64 0 + // X64-NEXT: ret <4 x float> %8 return _mm_mask_fmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fmsub_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00 - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fmsub_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__C, i64 0 + // APPLE-NEXT: %3 = fsub float -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call float @llvm.fma.f32(float %0, float %1, float %3) #12 + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, float %4, float 0.000000e+00 + // APPLE-NEXT: %8 = insertelement <4 x float> %__A, float %7, i64 0 + // APPLE-NEXT: ret <4 x float> %8 + // X64-LABEL: test_mm_maskz_fmsub_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__C, i64 0 + // X64-NEXT: %3 = fsub float -0.000000e+00, %2 + // X64-NEXT: %4 = tail call float @llvm.fma.f32(float %0, float %1, float %3) #12 + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, float %4, float 0.000000e+00 + // X64-NEXT: %8 = insertelement <4 x float> %__A, float %7, i64 0 + // X64-NEXT: ret <4 x float> %8 return _mm_maskz_fmsub_ss(__U, __A, __B, __C); } __m128 test_mm_maskz_fmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fmsub_round_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00 - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fmsub_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__C, i64 0 + // APPLE-NEXT: %3 = fsub float -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %3, i32 8) + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, float %4, float 0.000000e+00 + // APPLE-NEXT: %8 = insertelement <4 x float> %__A, float %7, i64 0 + // APPLE-NEXT: ret <4 x float> %8 + // X64-LABEL: test_mm_maskz_fmsub_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__C, i64 0 + // X64-NEXT: %3 = fsub float -0.000000e+00, %2 + // X64-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %3, i32 8) + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, float %4, float 0.000000e+00 + // X64-NEXT: %8 = insertelement <4 x float> %__A, float %7, i64 0 + // X64-NEXT: ret <4 x float> %8 return _mm_maskz_fmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmsub_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , [[ORIGC:%.+]] - // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]]) - // CHECK-NEXT: [[C2:%.+]] = extractelement <4 x float> [[ORIGC]], i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C2]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fmsub_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__Y, i64 0 + // APPLE-NEXT: %3 = fsub float -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call float @llvm.fma.f32(float %0, float %1, float %3) #12 + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, float %4, float %2 + // APPLE-NEXT: %8 = insertelement <4 x float> %__Y, float %7, i64 0 + // APPLE-NEXT: ret <4 x float> %8 + // X64-LABEL: test_mm_mask3_fmsub_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__Y, i64 0 + // X64-NEXT: %3 = fsub float -0.000000e+00, %2 + // X64-NEXT: %4 = tail call float @llvm.fma.f32(float %0, float %1, float %3) #12 + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, float %4, float %2 + // X64-NEXT: %8 = insertelement <4 x float> %__Y, float %7, i64 0 + // X64-NEXT: ret <4 x float> %8 return _mm_mask3_fmsub_ss(__W, __X, __Y, __U); } __m128 test_mm_mask3_fmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmsub_round_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , [[ORIGC:%.+]] - // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: [[C2:%.+]] = extractelement <4 x float> [[ORIGC]], i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C2]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fmsub_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // APPLE-NEXT: %2 = extractelement <4 x float> %__Y, i64 0 + // APPLE-NEXT: %3 = fsub float -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %3, i32 8) + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, float %4, float %2 + // APPLE-NEXT: %8 = insertelement <4 x float> %__Y, float %7, i64 0 + // APPLE-NEXT: ret <4 x float> %8 + // X64-LABEL: test_mm_mask3_fmsub_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // X64-NEXT: %2 = extractelement <4 x float> %__Y, i64 0 + // X64-NEXT: %3 = fsub float -0.000000e+00, %2 + // X64-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %1, float %3, i32 8) + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, float %4, float %2 + // X64-NEXT: %8 = insertelement <4 x float> %__Y, float %7, i64 0 + // X64-NEXT: ret <4 x float> %8 return _mm_mask3_fmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fnmadd_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fnmadd_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %4 = tail call float @llvm.fma.f32(float %0, float %2, float %3) #12 + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, float %4, float %0 + // APPLE-NEXT: %8 = insertelement <4 x float> %__W, float %7, i64 0 + // APPLE-NEXT: ret <4 x float> %8 + // X64-LABEL: test_mm_mask_fnmadd_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %4 = tail call float @llvm.fma.f32(float %0, float %2, float %3) #12 + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, float %4, float %0 + // X64-NEXT: %8 = insertelement <4 x float> %__W, float %7, i64 0 + // X64-NEXT: ret <4 x float> %8 return _mm_mask_fnmadd_ss(__W, __U, __A, __B); } __m128 test_mm_fnmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_fnmadd_round_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[FMA]], i64 0 + // APPLE-LABEL: test_mm_fnmadd_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__C, i64 0 + // APPLE-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %3, i32 8) + // APPLE-NEXT: %5 = insertelement <4 x float> %__A, float %4, i64 0 + // APPLE-NEXT: ret <4 x float> %5 + // X64-LABEL: test_mm_fnmadd_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__C, i64 0 + // X64-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %3, i32 8) + // X64-NEXT: %5 = insertelement <4 x float> %__A, float %4, i64 0 + // X64-NEXT: ret <4 x float> %5 return _mm_fnmadd_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_fnmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fnmadd_round_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fnmadd_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %3, i32 8) + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, float %4, float %0 + // APPLE-NEXT: %8 = insertelement <4 x float> %__W, float %7, i64 0 + // APPLE-NEXT: ret <4 x float> %8 + // X64-LABEL: test_mm_mask_fnmadd_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %3, i32 8) + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, float %4, float %0 + // X64-NEXT: %8 = insertelement <4 x float> %__W, float %7, i64 0 + // X64-NEXT: ret <4 x float> %8 return _mm_mask_fnmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fnmadd_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00 - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fnmadd_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__C, i64 0 + // APPLE-NEXT: %4 = tail call float @llvm.fma.f32(float %0, float %2, float %3) #12 + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, float %4, float 0.000000e+00 + // APPLE-NEXT: %8 = insertelement <4 x float> %__A, float %7, i64 0 + // APPLE-NEXT: ret <4 x float> %8 + // X64-LABEL: test_mm_maskz_fnmadd_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__C, i64 0 + // X64-NEXT: %4 = tail call float @llvm.fma.f32(float %0, float %2, float %3) #12 + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, float %4, float 0.000000e+00 + // X64-NEXT: %8 = insertelement <4 x float> %__A, float %7, i64 0 + // X64-NEXT: ret <4 x float> %8 return _mm_maskz_fnmadd_ss(__U, __A, __B, __C); } __m128 test_mm_maskz_fnmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fnmadd_round_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00 - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fnmadd_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__C, i64 0 + // APPLE-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %3, i32 8) + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, float %4, float 0.000000e+00 + // APPLE-NEXT: %8 = insertelement <4 x float> %__A, float %7, i64 0 + // APPLE-NEXT: ret <4 x float> %8 + // X64-LABEL: test_mm_maskz_fnmadd_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__C, i64 0 + // X64-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %3, i32 8) + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, float %4, float 0.000000e+00 + // X64-NEXT: %8 = insertelement <4 x float> %__A, float %7, i64 0 + // X64-NEXT: ret <4 x float> %8 return _mm_maskz_fnmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmadd_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[ORIGC:%.+]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fnmadd_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__Y, i64 0 + // APPLE-NEXT: %4 = tail call float @llvm.fma.f32(float %0, float %2, float %3) #12 + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, float %4, float %3 + // APPLE-NEXT: %8 = insertelement <4 x float> %__Y, float %7, i64 0 + // APPLE-NEXT: ret <4 x float> %8 + // X64-LABEL: test_mm_mask3_fnmadd_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__Y, i64 0 + // X64-NEXT: %4 = tail call float @llvm.fma.f32(float %0, float %2, float %3) #12 + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, float %4, float %3 + // X64-NEXT: %8 = insertelement <4 x float> %__Y, float %7, i64 0 + // X64-NEXT: ret <4 x float> %8 return _mm_mask3_fnmadd_ss(__W, __X, __Y, __U); } __m128 test_mm_mask3_fnmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmadd_round_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[ORIGC:%.+]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fnmadd_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__Y, i64 0 + // APPLE-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %3, i32 8) + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, float %4, float %3 + // APPLE-NEXT: %8 = insertelement <4 x float> %__Y, float %7, i64 0 + // APPLE-NEXT: ret <4 x float> %8 + // X64-LABEL: test_mm_mask3_fnmadd_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__Y, i64 0 + // X64-NEXT: %4 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %3, i32 8) + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, float %4, float %3 + // X64-NEXT: %8 = insertelement <4 x float> %__Y, float %7, i64 0 + // X64-NEXT: ret <4 x float> %8 return _mm_mask3_fnmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fnmsub_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fnmsub_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %4 = fsub float -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call float @llvm.fma.f32(float %0, float %2, float %4) #12 + // APPLE-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // APPLE-NEXT: %8 = select i1 %7, float %5, float %0 + // APPLE-NEXT: %9 = insertelement <4 x float> %__W, float %8, i64 0 + // APPLE-NEXT: ret <4 x float> %9 + // X64-LABEL: test_mm_mask_fnmsub_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %4 = fsub float -0.000000e+00, %3 + // X64-NEXT: %5 = tail call float @llvm.fma.f32(float %0, float %2, float %4) #12 + // X64-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // X64-NEXT: %8 = select i1 %7, float %5, float %0 + // X64-NEXT: %9 = insertelement <4 x float> %__W, float %8, i64 0 + // X64-NEXT: ret <4 x float> %9 return _mm_mask_fnmsub_ss(__W, __U, __A, __B); } __m128 test_mm_fnmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_fnmsub_round_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[FMA]], i64 0 + // APPLE-LABEL: test_mm_fnmsub_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__C, i64 0 + // APPLE-NEXT: %4 = fsub float -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %4, i32 8) + // APPLE-NEXT: %6 = insertelement <4 x float> %__A, float %5, i64 0 + // APPLE-NEXT: ret <4 x float> %6 + // X64-LABEL: test_mm_fnmsub_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__C, i64 0 + // X64-NEXT: %4 = fsub float -0.000000e+00, %3 + // X64-NEXT: %5 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %4, i32 8) + // X64-NEXT: %6 = insertelement <4 x float> %__A, float %5, i64 0 + // X64-NEXT: ret <4 x float> %6 return _mm_fnmsub_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_fnmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fnmsub_round_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fnmsub_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %4 = fsub float -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %4, i32 8) + // APPLE-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // APPLE-NEXT: %8 = select i1 %7, float %5, float %0 + // APPLE-NEXT: %9 = insertelement <4 x float> %__W, float %8, i64 0 + // APPLE-NEXT: ret <4 x float> %9 + // X64-LABEL: test_mm_mask_fnmsub_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %4 = fsub float -0.000000e+00, %3 + // X64-NEXT: %5 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %4, i32 8) + // X64-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // X64-NEXT: %8 = select i1 %7, float %5, float %0 + // X64-NEXT: %9 = insertelement <4 x float> %__W, float %8, i64 0 + // X64-NEXT: ret <4 x float> %9 return _mm_mask_fnmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fnmsub_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00 - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fnmsub_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__C, i64 0 + // APPLE-NEXT: %4 = fsub float -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call float @llvm.fma.f32(float %0, float %2, float %4) #12 + // APPLE-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // APPLE-NEXT: %8 = select i1 %7, float %5, float 0.000000e+00 + // APPLE-NEXT: %9 = insertelement <4 x float> %__A, float %8, i64 0 + // APPLE-NEXT: ret <4 x float> %9 + // X64-LABEL: test_mm_maskz_fnmsub_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__C, i64 0 + // X64-NEXT: %4 = fsub float -0.000000e+00, %3 + // X64-NEXT: %5 = tail call float @llvm.fma.f32(float %0, float %2, float %4) #12 + // X64-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // X64-NEXT: %8 = select i1 %7, float %5, float 0.000000e+00 + // X64-NEXT: %9 = insertelement <4 x float> %__A, float %8, i64 0 + // X64-NEXT: ret <4 x float> %9 return _mm_maskz_fnmsub_ss(__U, __A, __B, __C); } __m128 test_mm_maskz_fnmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fnmsub_round_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00 - // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fnmsub_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__C, i64 0 + // APPLE-NEXT: %4 = fsub float -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %4, i32 8) + // APPLE-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // APPLE-NEXT: %8 = select i1 %7, float %5, float 0.000000e+00 + // APPLE-NEXT: %9 = insertelement <4 x float> %__A, float %8, i64 0 + // APPLE-NEXT: ret <4 x float> %9 + // X64-LABEL: test_mm_maskz_fnmsub_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__A, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__B, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__C, i64 0 + // X64-NEXT: %4 = fsub float -0.000000e+00, %3 + // X64-NEXT: %5 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %4, i32 8) + // X64-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // X64-NEXT: %8 = select i1 %7, float %5, float 0.000000e+00 + // X64-NEXT: %9 = insertelement <4 x float> %__A, float %8, i64 0 + // X64-NEXT: ret <4 x float> %9 return _mm_maskz_fnmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128 test_mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmsub_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <4 x float> , [[ORIGC:%.+]] - // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.fma.f32(float [[A]], float [[B]], float [[C]]) - // CHECK-NEXT: [[C2:%.+]] = extractelement <4 x float> [[ORIGC]], i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C2]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fnmsub_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__Y, i64 0 + // APPLE-NEXT: %4 = fsub float -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call float @llvm.fma.f32(float %0, float %2, float %4) #12 + // APPLE-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // APPLE-NEXT: %8 = select i1 %7, float %5, float %3 + // APPLE-NEXT: %9 = insertelement <4 x float> %__Y, float %8, i64 0 + // APPLE-NEXT: ret <4 x float> %9 + // X64-LABEL: test_mm_mask3_fnmsub_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__Y, i64 0 + // X64-NEXT: %4 = fsub float -0.000000e+00, %3 + // X64-NEXT: %5 = tail call float @llvm.fma.f32(float %0, float %2, float %4) #12 + // X64-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // X64-NEXT: %8 = select i1 %7, float %5, float %3 + // X64-NEXT: %9 = insertelement <4 x float> %__Y, float %8, i64 0 + // X64-NEXT: ret <4 x float> %9 return _mm_mask3_fnmsub_ss(__W, __X, __Y, __U); } __m128 test_mm_mask3_fnmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmsub_round_ss - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <4 x float> , [[ORIGC:%.+]] - // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8) - // CHECK-NEXT: [[C2:%.+]] = extractelement <4 x float> [[ORIGC]], i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C2]] - // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fnmsub_round_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // APPLE-NEXT: %2 = fsub float -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <4 x float> %__Y, i64 0 + // APPLE-NEXT: %4 = fsub float -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %4, i32 8) + // APPLE-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // APPLE-NEXT: %8 = select i1 %7, float %5, float %3 + // APPLE-NEXT: %9 = insertelement <4 x float> %__Y, float %8, i64 0 + // APPLE-NEXT: ret <4 x float> %9 + // X64-LABEL: test_mm_mask3_fnmsub_round_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %1 = extractelement <4 x float> %__X, i64 0 + // X64-NEXT: %2 = fsub float -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <4 x float> %__Y, i64 0 + // X64-NEXT: %4 = fsub float -0.000000e+00, %3 + // X64-NEXT: %5 = tail call float @llvm.x86.avx512.vfmadd.f32(float %0, float %2, float %4, i32 8) + // X64-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // X64-NEXT: %8 = select i1 %7, float %5, float %3 + // X64-NEXT: %9 = insertelement <4 x float> %__Y, float %8, i64 0 + // X64-NEXT: ret <4 x float> %9 return _mm_mask3_fnmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fmadd_sd - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fmadd_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #12 + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // APPLE-NEXT: %6 = select i1 %5, double %3, double %0 + // APPLE-NEXT: %7 = insertelement <2 x double> %__W, double %6, i64 0 + // APPLE-NEXT: ret <2 x double> %7 + // X64-LABEL: test_mm_mask_fmadd_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #12 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // X64-NEXT: %6 = select i1 %5, double %3, double %0 + // X64-NEXT: %7 = insertelement <2 x double> %__W, double %6, i64 0 + // X64-NEXT: ret <2 x double> %7 return _mm_mask_fmadd_sd(__W, __U, __A, __B); } __m128d test_mm_fmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_fmadd_round_sd - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[FMA]], i64 0 + // APPLE-LABEL: test_mm_fmadd_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__C, i64 0 + // APPLE-NEXT: %3 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %2, i32 8) + // APPLE-NEXT: %4 = insertelement <2 x double> %__A, double %3, i64 0 + // APPLE-NEXT: ret <2 x double> %4 + // X64-LABEL: test_mm_fmadd_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__C, i64 0 + // X64-NEXT: %3 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %2, i32 8) + // X64-NEXT: %4 = insertelement <2 x double> %__A, double %3, i64 0 + // X64-NEXT: ret <2 x double> %4 return _mm_fmadd_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_fmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fmadd_round_sd - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fmadd_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %3 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %2, i32 8) + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // APPLE-NEXT: %6 = select i1 %5, double %3, double %0 + // APPLE-NEXT: %7 = insertelement <2 x double> %__W, double %6, i64 0 + // APPLE-NEXT: ret <2 x double> %7 + // X64-LABEL: test_mm_mask_fmadd_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %3 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %2, i32 8) + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // X64-NEXT: %6 = select i1 %5, double %3, double %0 + // X64-NEXT: %7 = insertelement <2 x double> %__W, double %6, i64 0 + // X64-NEXT: ret <2 x double> %7 return _mm_mask_fmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fmadd_sd - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00 - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fmadd_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__C, i64 0 + // APPLE-NEXT: %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #12 + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // APPLE-NEXT: %6 = select i1 %5, double %3, double 0.000000e+00 + // APPLE-NEXT: %7 = insertelement <2 x double> %__A, double %6, i64 0 + // APPLE-NEXT: ret <2 x double> %7 + // X64-LABEL: test_mm_maskz_fmadd_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__C, i64 0 + // X64-NEXT: %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #12 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // X64-NEXT: %6 = select i1 %5, double %3, double 0.000000e+00 + // X64-NEXT: %7 = insertelement <2 x double> %__A, double %6, i64 0 + // X64-NEXT: ret <2 x double> %7 return _mm_maskz_fmadd_sd(__U, __A, __B, __C); } __m128d test_mm_maskz_fmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fmadd_round_sd - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00 - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fmadd_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__C, i64 0 + // APPLE-NEXT: %3 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %2, i32 8) + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // APPLE-NEXT: %6 = select i1 %5, double %3, double 0.000000e+00 + // APPLE-NEXT: %7 = insertelement <2 x double> %__A, double %6, i64 0 + // APPLE-NEXT: ret <2 x double> %7 + // X64-LABEL: test_mm_maskz_fmadd_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__C, i64 0 + // X64-NEXT: %3 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %2, i32 8) + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // X64-NEXT: %6 = select i1 %5, double %3, double 0.000000e+00 + // X64-NEXT: %7 = insertelement <2 x double> %__A, double %6, i64 0 + // X64-NEXT: ret <2 x double> %7 return _mm_maskz_fmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmadd_sd - // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[ORIGC:%.+]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fmadd_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__Y, i64 0 + // APPLE-NEXT: %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #12 + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // APPLE-NEXT: %6 = select i1 %5, double %3, double %2 + // APPLE-NEXT: %7 = insertelement <2 x double> %__Y, double %6, i64 0 + // APPLE-NEXT: ret <2 x double> %7 + // X64-LABEL: test_mm_mask3_fmadd_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__Y, i64 0 + // X64-NEXT: %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #12 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // X64-NEXT: %6 = select i1 %5, double %3, double %2 + // X64-NEXT: %7 = insertelement <2 x double> %__Y, double %6, i64 0 + // X64-NEXT: ret <2 x double> %7 return _mm_mask3_fmadd_sd(__W, __X, __Y, __U); } __m128d test_mm_mask3_fmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmadd_round_sd - // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[ORIGC:%.+]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fmadd_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__Y, i64 0 + // APPLE-NEXT: %3 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %2, i32 8) + // APPLE-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // APPLE-NEXT: %6 = select i1 %5, double %3, double %2 + // APPLE-NEXT: %7 = insertelement <2 x double> %__Y, double %6, i64 0 + // APPLE-NEXT: ret <2 x double> %7 + // X64-LABEL: test_mm_mask3_fmadd_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__Y, i64 0 + // X64-NEXT: %3 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %2, i32 8) + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = extractelement <8 x i1> %4, i64 0 + // X64-NEXT: %6 = select i1 %5, double %3, double %2 + // X64-NEXT: %7 = insertelement <2 x double> %__Y, double %6, i64 0 + // X64-NEXT: ret <2 x double> %7 return _mm_mask3_fmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fmsub_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fmsub_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %3 = fsub double -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call double @llvm.fma.f64(double %0, double %1, double %3) #12 + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, double %4, double %0 + // APPLE-NEXT: %8 = insertelement <2 x double> %__W, double %7, i64 0 + // APPLE-NEXT: ret <2 x double> %8 + // X64-LABEL: test_mm_mask_fmsub_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %3 = fsub double -0.000000e+00, %2 + // X64-NEXT: %4 = tail call double @llvm.fma.f64(double %0, double %1, double %3) #12 + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, double %4, double %0 + // X64-NEXT: %8 = insertelement <2 x double> %__W, double %7, i64 0 + // X64-NEXT: ret <2 x double> %8 return _mm_mask_fmsub_sd(__W, __U, __A, __B); } __m128d test_mm_fmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_fmsub_round_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[FMA]], i64 0 + // APPLE-LABEL: test_mm_fmsub_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__C, i64 0 + // APPLE-NEXT: %3 = fsub double -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %3, i32 8) + // APPLE-NEXT: %5 = insertelement <2 x double> %__A, double %4, i64 0 + // APPLE-NEXT: ret <2 x double> %5 + // X64-LABEL: test_mm_fmsub_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__C, i64 0 + // X64-NEXT: %3 = fsub double -0.000000e+00, %2 + // X64-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %3, i32 8) + // X64-NEXT: %5 = insertelement <2 x double> %__A, double %4, i64 0 + // X64-NEXT: ret <2 x double> %5 return _mm_fmsub_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_fmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fmsub_round_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fmsub_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %3 = fsub double -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %3, i32 8) + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, double %4, double %0 + // APPLE-NEXT: %8 = insertelement <2 x double> %__W, double %7, i64 0 + // APPLE-NEXT: ret <2 x double> %8 + // X64-LABEL: test_mm_mask_fmsub_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %3 = fsub double -0.000000e+00, %2 + // X64-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %3, i32 8) + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, double %4, double %0 + // X64-NEXT: %8 = insertelement <2 x double> %__W, double %7, i64 0 + // X64-NEXT: ret <2 x double> %8 return _mm_mask_fmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fmsub_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00 - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fmsub_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__C, i64 0 + // APPLE-NEXT: %3 = fsub double -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call double @llvm.fma.f64(double %0, double %1, double %3) #12 + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, double %4, double 0.000000e+00 + // APPLE-NEXT: %8 = insertelement <2 x double> %__A, double %7, i64 0 + // APPLE-NEXT: ret <2 x double> %8 + // X64-LABEL: test_mm_maskz_fmsub_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__C, i64 0 + // X64-NEXT: %3 = fsub double -0.000000e+00, %2 + // X64-NEXT: %4 = tail call double @llvm.fma.f64(double %0, double %1, double %3) #12 + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, double %4, double 0.000000e+00 + // X64-NEXT: %8 = insertelement <2 x double> %__A, double %7, i64 0 + // X64-NEXT: ret <2 x double> %8 return _mm_maskz_fmsub_sd(__U, __A, __B, __C); } __m128d test_mm_maskz_fmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fmsub_round_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00 - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fmsub_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__C, i64 0 + // APPLE-NEXT: %3 = fsub double -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %3, i32 8) + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, double %4, double 0.000000e+00 + // APPLE-NEXT: %8 = insertelement <2 x double> %__A, double %7, i64 0 + // APPLE-NEXT: ret <2 x double> %8 + // X64-LABEL: test_mm_maskz_fmsub_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__C, i64 0 + // X64-NEXT: %3 = fsub double -0.000000e+00, %2 + // X64-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %3, i32 8) + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, double %4, double 0.000000e+00 + // X64-NEXT: %8 = insertelement <2 x double> %__A, double %7, i64 0 + // X64-NEXT: ret <2 x double> %8 return _mm_maskz_fmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmsub_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , [[ORIGC:%.+]] - // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]]) - // CHECK-NEXT: [[C2:%.+]] = extractelement <2 x double> [[ORIGC]], i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C2]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fmsub_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__Y, i64 0 + // APPLE-NEXT: %3 = fsub double -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call double @llvm.fma.f64(double %0, double %1, double %3) #12 + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, double %4, double %2 + // APPLE-NEXT: %8 = insertelement <2 x double> %__Y, double %7, i64 0 + // APPLE-NEXT: ret <2 x double> %8 + // X64-LABEL: test_mm_mask3_fmsub_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__Y, i64 0 + // X64-NEXT: %3 = fsub double -0.000000e+00, %2 + // X64-NEXT: %4 = tail call double @llvm.fma.f64(double %0, double %1, double %3) #12 + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, double %4, double %2 + // X64-NEXT: %8 = insertelement <2 x double> %__Y, double %7, i64 0 + // X64-NEXT: ret <2 x double> %8 return _mm_mask3_fmsub_sd(__W, __X, __Y, __U); } __m128d test_mm_mask3_fmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmsub_round_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , [[ORIGC:%.+]] - // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: [[C2:%.+]] = extractelement <2 x double> [[ORIGC]], i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C2]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fmsub_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // APPLE-NEXT: %2 = extractelement <2 x double> %__Y, i64 0 + // APPLE-NEXT: %3 = fsub double -0.000000e+00, %2 + // APPLE-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %3, i32 8) + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, double %4, double %2 + // APPLE-NEXT: %8 = insertelement <2 x double> %__Y, double %7, i64 0 + // APPLE-NEXT: ret <2 x double> %8 + // X64-LABEL: test_mm_mask3_fmsub_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // X64-NEXT: %2 = extractelement <2 x double> %__Y, i64 0 + // X64-NEXT: %3 = fsub double -0.000000e+00, %2 + // X64-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %1, double %3, i32 8) + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, double %4, double %2 + // X64-NEXT: %8 = insertelement <2 x double> %__Y, double %7, i64 0 + // X64-NEXT: ret <2 x double> %8 return _mm_mask3_fmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fnmadd_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fnmadd_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %4 = tail call double @llvm.fma.f64(double %0, double %2, double %3) #12 + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, double %4, double %0 + // APPLE-NEXT: %8 = insertelement <2 x double> %__W, double %7, i64 0 + // APPLE-NEXT: ret <2 x double> %8 + // X64-LABEL: test_mm_mask_fnmadd_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %4 = tail call double @llvm.fma.f64(double %0, double %2, double %3) #12 + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, double %4, double %0 + // X64-NEXT: %8 = insertelement <2 x double> %__W, double %7, i64 0 + // X64-NEXT: ret <2 x double> %8 return _mm_mask_fnmadd_sd(__W, __U, __A, __B); } __m128d test_mm_fnmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_fnmadd_round_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[FMA]], i64 0 + // APPLE-LABEL: test_mm_fnmadd_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__C, i64 0 + // APPLE-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %3, i32 8) + // APPLE-NEXT: %5 = insertelement <2 x double> %__A, double %4, i64 0 + // APPLE-NEXT: ret <2 x double> %5 + // X64-LABEL: test_mm_fnmadd_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__C, i64 0 + // X64-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %3, i32 8) + // X64-NEXT: %5 = insertelement <2 x double> %__A, double %4, i64 0 + // X64-NEXT: ret <2 x double> %5 return _mm_fnmadd_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_fnmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fnmadd_round_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fnmadd_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %3, i32 8) + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, double %4, double %0 + // APPLE-NEXT: %8 = insertelement <2 x double> %__W, double %7, i64 0 + // APPLE-NEXT: ret <2 x double> %8 + // X64-LABEL: test_mm_mask_fnmadd_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %3, i32 8) + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, double %4, double %0 + // X64-NEXT: %8 = insertelement <2 x double> %__W, double %7, i64 0 + // X64-NEXT: ret <2 x double> %8 return _mm_mask_fnmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fnmadd_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00 - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fnmadd_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__C, i64 0 + // APPLE-NEXT: %4 = tail call double @llvm.fma.f64(double %0, double %2, double %3) #12 + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, double %4, double 0.000000e+00 + // APPLE-NEXT: %8 = insertelement <2 x double> %__A, double %7, i64 0 + // APPLE-NEXT: ret <2 x double> %8 + // X64-LABEL: test_mm_maskz_fnmadd_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__C, i64 0 + // X64-NEXT: %4 = tail call double @llvm.fma.f64(double %0, double %2, double %3) #12 + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, double %4, double 0.000000e+00 + // X64-NEXT: %8 = insertelement <2 x double> %__A, double %7, i64 0 + // X64-NEXT: ret <2 x double> %8 return _mm_maskz_fnmadd_sd(__U, __A, __B, __C); } __m128d test_mm_maskz_fnmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fnmadd_round_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00 - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fnmadd_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__C, i64 0 + // APPLE-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %3, i32 8) + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, double %4, double 0.000000e+00 + // APPLE-NEXT: %8 = insertelement <2 x double> %__A, double %7, i64 0 + // APPLE-NEXT: ret <2 x double> %8 + // X64-LABEL: test_mm_maskz_fnmadd_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__C, i64 0 + // X64-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %3, i32 8) + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, double %4, double 0.000000e+00 + // X64-NEXT: %8 = insertelement <2 x double> %__A, double %7, i64 0 + // X64-NEXT: ret <2 x double> %8 return _mm_maskz_fnmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmadd_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[ORIGC:%.+]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fnmadd_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__Y, i64 0 + // APPLE-NEXT: %4 = tail call double @llvm.fma.f64(double %0, double %2, double %3) #12 + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, double %4, double %3 + // APPLE-NEXT: %8 = insertelement <2 x double> %__Y, double %7, i64 0 + // APPLE-NEXT: ret <2 x double> %8 + // X64-LABEL: test_mm_mask3_fnmadd_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__Y, i64 0 + // X64-NEXT: %4 = tail call double @llvm.fma.f64(double %0, double %2, double %3) #12 + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, double %4, double %3 + // X64-NEXT: %8 = insertelement <2 x double> %__Y, double %7, i64 0 + // X64-NEXT: ret <2 x double> %8 return _mm_mask3_fnmadd_sd(__W, __X, __Y, __U); } __m128d test_mm_mask3_fnmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmadd_round_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[ORIGC:%.+]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fnmadd_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__Y, i64 0 + // APPLE-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %3, i32 8) + // APPLE-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // APPLE-NEXT: %7 = select i1 %6, double %4, double %3 + // APPLE-NEXT: %8 = insertelement <2 x double> %__Y, double %7, i64 0 + // APPLE-NEXT: ret <2 x double> %8 + // X64-LABEL: test_mm_mask3_fnmadd_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__Y, i64 0 + // X64-NEXT: %4 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %3, i32 8) + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = extractelement <8 x i1> %5, i64 0 + // X64-NEXT: %7 = select i1 %6, double %4, double %3 + // X64-NEXT: %8 = insertelement <2 x double> %__Y, double %7, i64 0 + // X64-NEXT: ret <2 x double> %8 return _mm_mask3_fnmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fnmsub_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fnmsub_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %4 = fsub double -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call double @llvm.fma.f64(double %0, double %2, double %4) #12 + // APPLE-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // APPLE-NEXT: %8 = select i1 %7, double %5, double %0 + // APPLE-NEXT: %9 = insertelement <2 x double> %__W, double %8, i64 0 + // APPLE-NEXT: ret <2 x double> %9 + // X64-LABEL: test_mm_mask_fnmsub_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %4 = fsub double -0.000000e+00, %3 + // X64-NEXT: %5 = tail call double @llvm.fma.f64(double %0, double %2, double %4) #12 + // X64-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // X64-NEXT: %8 = select i1 %7, double %5, double %0 + // X64-NEXT: %9 = insertelement <2 x double> %__W, double %8, i64 0 + // X64-NEXT: ret <2 x double> %9 return _mm_mask_fnmsub_sd(__W, __U, __A, __B); } __m128d test_mm_fnmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_fnmsub_round_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[FMA]], i64 0 + // APPLE-LABEL: test_mm_fnmsub_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__C, i64 0 + // APPLE-NEXT: %4 = fsub double -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %4, i32 8) + // APPLE-NEXT: %6 = insertelement <2 x double> %__A, double %5, i64 0 + // APPLE-NEXT: ret <2 x double> %6 + // X64-LABEL: test_mm_fnmsub_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__C, i64 0 + // X64-NEXT: %4 = fsub double -0.000000e+00, %3 + // X64-NEXT: %5 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %4, i32 8) + // X64-NEXT: %6 = insertelement <2 x double> %__A, double %5, i64 0 + // X64-NEXT: ret <2 x double> %6 return _mm_fnmsub_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_fnmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fnmsub_round_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_fnmsub_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %4 = fsub double -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %4, i32 8) + // APPLE-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // APPLE-NEXT: %8 = select i1 %7, double %5, double %0 + // APPLE-NEXT: %9 = insertelement <2 x double> %__W, double %8, i64 0 + // APPLE-NEXT: ret <2 x double> %9 + // X64-LABEL: test_mm_mask_fnmsub_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %4 = fsub double -0.000000e+00, %3 + // X64-NEXT: %5 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %4, i32 8) + // X64-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // X64-NEXT: %8 = select i1 %7, double %5, double %0 + // X64-NEXT: %9 = insertelement <2 x double> %__W, double %8, i64 0 + // X64-NEXT: ret <2 x double> %9 return _mm_mask_fnmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fnmsub_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]]) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00 - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fnmsub_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__C, i64 0 + // APPLE-NEXT: %4 = fsub double -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call double @llvm.fma.f64(double %0, double %2, double %4) #12 + // APPLE-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // APPLE-NEXT: %8 = select i1 %7, double %5, double 0.000000e+00 + // APPLE-NEXT: %9 = insertelement <2 x double> %__A, double %8, i64 0 + // APPLE-NEXT: ret <2 x double> %9 + // X64-LABEL: test_mm_maskz_fnmsub_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__C, i64 0 + // X64-NEXT: %4 = fsub double -0.000000e+00, %3 + // X64-NEXT: %5 = tail call double @llvm.fma.f64(double %0, double %2, double %4) #12 + // X64-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // X64-NEXT: %8 = select i1 %7, double %5, double 0.000000e+00 + // X64-NEXT: %9 = insertelement <2 x double> %__A, double %8, i64 0 + // X64-NEXT: ret <2 x double> %9 return _mm_maskz_fnmsub_sd(__U, __A, __B, __C); } __m128d test_mm_maskz_fnmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fnmsub_round_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00 - // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_fnmsub_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__C, i64 0 + // APPLE-NEXT: %4 = fsub double -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %4, i32 8) + // APPLE-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // APPLE-NEXT: %8 = select i1 %7, double %5, double 0.000000e+00 + // APPLE-NEXT: %9 = insertelement <2 x double> %__A, double %8, i64 0 + // APPLE-NEXT: ret <2 x double> %9 + // X64-LABEL: test_mm_maskz_fnmsub_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__A, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__B, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__C, i64 0 + // X64-NEXT: %4 = fsub double -0.000000e+00, %3 + // X64-NEXT: %5 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %4, i32 8) + // X64-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // X64-NEXT: %8 = select i1 %7, double %5, double 0.000000e+00 + // X64-NEXT: %9 = insertelement <2 x double> %__A, double %8, i64 0 + // X64-NEXT: ret <2 x double> %9 return _mm_maskz_fnmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m128d test_mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmsub_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <2 x double> , [[ORIGC:%.+]] - // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.fma.f64(double [[A]], double [[B]], double [[C]]) - // CHECK-NEXT: [[C2:%.+]] = extractelement <2 x double> [[ORIGC]], i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C2]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fnmsub_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__Y, i64 0 + // APPLE-NEXT: %4 = fsub double -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call double @llvm.fma.f64(double %0, double %2, double %4) #12 + // APPLE-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // APPLE-NEXT: %8 = select i1 %7, double %5, double %3 + // APPLE-NEXT: %9 = insertelement <2 x double> %__Y, double %8, i64 0 + // APPLE-NEXT: ret <2 x double> %9 + // X64-LABEL: test_mm_mask3_fnmsub_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__Y, i64 0 + // X64-NEXT: %4 = fsub double -0.000000e+00, %3 + // X64-NEXT: %5 = tail call double @llvm.fma.f64(double %0, double %2, double %4) #12 + // X64-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // X64-NEXT: %8 = select i1 %7, double %5, double %3 + // X64-NEXT: %9 = insertelement <2 x double> %__Y, double %8, i64 0 + // X64-NEXT: ret <2 x double> %9 return _mm_mask3_fnmsub_sd(__W, __X, __Y, __U); } __m128d test_mm_mask3_fnmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmsub_round_sd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.*}} - // CHECK: [[NEG2:%.+]] = fsub <2 x double> , [[ORIGC:%.+]] - // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0 - // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8) - // CHECK-NEXT: [[C2:%.+]] = extractelement <2 x double> [[ORIGC]], i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C2]] - // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask3_fnmsub_round_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // APPLE-NEXT: %2 = fsub double -0.000000e+00, %1 + // APPLE-NEXT: %3 = extractelement <2 x double> %__Y, i64 0 + // APPLE-NEXT: %4 = fsub double -0.000000e+00, %3 + // APPLE-NEXT: %5 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %4, i32 8) + // APPLE-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // APPLE-NEXT: %8 = select i1 %7, double %5, double %3 + // APPLE-NEXT: %9 = insertelement <2 x double> %__Y, double %8, i64 0 + // APPLE-NEXT: ret <2 x double> %9 + // X64-LABEL: test_mm_mask3_fnmsub_round_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %1 = extractelement <2 x double> %__X, i64 0 + // X64-NEXT: %2 = fsub double -0.000000e+00, %1 + // X64-NEXT: %3 = extractelement <2 x double> %__Y, i64 0 + // X64-NEXT: %4 = fsub double -0.000000e+00, %3 + // X64-NEXT: %5 = tail call double @llvm.x86.avx512.vfmadd.f64(double %0, double %2, double %4, i32 8) + // X64-NEXT: %6 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %7 = extractelement <8 x i1> %6, i64 0 + // X64-NEXT: %8 = select i1 %7, double %5, double %3 + // X64-NEXT: %9 = insertelement <2 x double> %__Y, double %8, i64 0 + // X64-NEXT: ret <2 x double> %9 return _mm_mask3_fnmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_permutex_pd(__m512d __X) { - // CHECK-LABEL: @test_mm512_permutex_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> + // APPLE-LABEL: test_mm512_permutex_pd + // APPLE: entry: + // APPLE-NEXT: %perm = shufflevector <8 x double> %__X, <8 x double> undef, <8 x i32> + // APPLE-NEXT: ret <8 x double> %perm + // X64-LABEL: test_mm512_permutex_pd + // X64: entry: + // X64-NEXT: %perm = shufflevector <8 x double> %__X, <8 x double> undef, <8 x i32> + // X64-NEXT: ret <8 x double> %perm return _mm512_permutex_pd(__X, 0); } __m512d test_mm512_mask_permutex_pd(__m512d __W, __mmask8 __U, __m512d __X) { - // CHECK-LABEL: @test_mm512_mask_permutex_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permutex_pd + // APPLE: entry: + // APPLE-NEXT: %perm = shufflevector <8 x double> %__X, <8 x double> undef, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %perm, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_permutex_pd + // X64: entry: + // X64-NEXT: %perm = shufflevector <8 x double> %__X, <8 x double> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %perm, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_permutex_pd(__W, __U, __X, 0); } __m512d test_mm512_maskz_permutex_pd(__mmask8 __U, __m512d __X) { - // CHECK-LABEL: @test_mm512_maskz_permutex_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permutex_pd + // APPLE: entry: + // APPLE-NEXT: %perm = shufflevector <8 x double> %__X, <8 x double> undef, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %perm, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_permutex_pd + // X64: entry: + // X64-NEXT: %perm = shufflevector <8 x double> %__X, <8 x double> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %perm, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_permutex_pd(__U, __X, 0); } __m512i test_mm512_permutex_epi64(__m512i __X) { - // CHECK-LABEL: @test_mm512_permutex_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> + // APPLE-LABEL: test_mm512_permutex_epi64 + // APPLE: entry: + // APPLE-NEXT: %perm = shufflevector <8 x i64> %__X, <8 x i64> undef, <8 x i32> + // APPLE-NEXT: ret <8 x i64> %perm + // X64-LABEL: test_mm512_permutex_epi64 + // X64: entry: + // X64-NEXT: %perm = shufflevector <8 x i64> %__X, <8 x i64> undef, <8 x i32> + // X64-NEXT: ret <8 x i64> %perm return _mm512_permutex_epi64(__X, 0); } __m512i test_mm512_mask_permutex_epi64(__m512i __W, __mmask8 __M, __m512i __X) { - // CHECK-LABEL: @test_mm512_mask_permutex_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permutex_epi64 + // APPLE: entry: + // APPLE-NEXT: %perm = shufflevector <8 x i64> %__X, <8 x i64> undef, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %perm, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_permutex_epi64 + // X64: entry: + // X64-NEXT: %perm = shufflevector <8 x i64> %__X, <8 x i64> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %perm, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_permutex_epi64(__W, __M, __X, 0); } __m512i test_mm512_maskz_permutex_epi64(__mmask8 __M, __m512i __X) { - // CHECK-LABEL: @test_mm512_maskz_permutex_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permutex_epi64 + // APPLE: entry: + // APPLE-NEXT: %perm = shufflevector <8 x i64> %__X, <8 x i64> undef, <8 x i32> + // APPLE-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %perm, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_permutex_epi64 + // X64: entry: + // X64-NEXT: %perm = shufflevector <8 x i64> %__X, <8 x i64> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %perm, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_permutex_epi64(__M, __X, 0); } __m512d test_mm512_permutexvar_pd(__m512i __X, __m512d __Y) { - // CHECK-LABEL: @test_mm512_permutexvar_pd - // CHECK: @llvm.x86.avx512.permvar.df.512 + // APPLE-LABEL: test_mm512_permutexvar_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %__Y, <8 x i64> %__X) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_permutexvar_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %__Y, <8 x i64> %__X) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_permutexvar_pd(__X, __Y); } __m512d test_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) { - // CHECK-LABEL: @test_mm512_mask_permutexvar_pd - // CHECK: @llvm.x86.avx512.permvar.df.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permutexvar_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %__Y, <8 x i64> %__X) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_permutexvar_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %__Y, <8 x i64> %__X) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_permutexvar_pd(__W, __U, __X, __Y); } __m512d test_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) { - // CHECK-LABEL: @test_mm512_maskz_permutexvar_pd - // CHECK: @llvm.x86.avx512.permvar.df.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permutexvar_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %__Y, <8 x i64> %__X) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_permutexvar_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %__Y, <8 x i64> %__X) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_permutexvar_pd(__U, __X, __Y); } __m512i test_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_permutexvar_epi64 - // CHECK: @llvm.x86.avx512.permvar.di.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permutexvar_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %__Y, <8 x i64> %__X) #12 + // APPLE-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_permutexvar_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %__Y, <8 x i64> %__X) #12 + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_permutexvar_epi64(__M, __X, __Y); } __m512i test_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_permutexvar_epi64 - // CHECK: @llvm.x86.avx512.permvar.di.512 + // APPLE-LABEL: test_mm512_permutexvar_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %__Y, <8 x i64> %__X) #12 + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_permutexvar_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %__Y, <8 x i64> %__X) #12 + // X64-NEXT: ret <8 x i64> %0 return _mm512_permutexvar_epi64(__X, __Y); } __m512i test_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_permutexvar_epi64 - // CHECK: @llvm.x86.avx512.permvar.di.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permutexvar_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %__Y, <8 x i64> %__X) #12 + // APPLE-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_permutexvar_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %__Y, <8 x i64> %__X) #12 + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_permutexvar_epi64(__W, __M, __X, __Y); } __m512 test_mm512_permutexvar_ps(__m512i __X, __m512 __Y) { - // CHECK-LABEL: @test_mm512_permutexvar_ps - // CHECK: @llvm.x86.avx512.permvar.sf.512 + // APPLE-LABEL: test_mm512_permutexvar_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %__Y, <16 x i32> %0) #12 + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_permutexvar_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %__Y, <16 x i32> %0) #12 + // X64-NEXT: ret <16 x float> %1 return _mm512_permutexvar_ps(__X, __Y); } __m512 test_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) { - // CHECK-LABEL: @test_mm512_mask_permutexvar_ps - // CHECK: @llvm.x86.avx512.permvar.sf.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permutexvar_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %__Y, <16 x i32> %0) #12 + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_mask_permutexvar_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %__Y, <16 x i32> %0) #12 + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %__W + // X64-NEXT: ret <16 x float> %3 return _mm512_mask_permutexvar_ps(__W, __U, __X, __Y); } __m512 test_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) { - // CHECK-LABEL: @test_mm512_maskz_permutexvar_ps - // CHECK: @llvm.x86.avx512.permvar.sf.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permutexvar_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %__Y, <16 x i32> %0) #12 + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %3 + // X64-LABEL: test_mm512_maskz_permutexvar_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %__Y, <16 x i32> %0) #12 + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %3 return _mm512_maskz_permutexvar_ps(__U, __X, __Y); } __m512i test_mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_permutexvar_epi32 - // CHECK: @llvm.x86.avx512.permvar.si.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_permutexvar_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__Y to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // APPLE-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %5 + // X64-LABEL: test_mm512_maskz_permutexvar_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__Y to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <16 x i32> %4 to <8 x i64> + // X64-NEXT: ret <8 x i64> %5 return _mm512_maskz_permutexvar_epi32(__M, __X, __Y); } __m512i test_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_permutexvar_epi32 - // CHECK: @llvm.x86.avx512.permvar.si.512 + // APPLE-LABEL: test_mm512_permutexvar_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__Y to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_permutexvar_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__Y to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_permutexvar_epi32(__X, __Y); } __m512i test_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_permutexvar_epi32 - // CHECK: @llvm.x86.avx512.permvar.si.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_permutexvar_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__Y to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__X to <16 x i32> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %0, <16 x i32> %1) #12 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_mask_permutexvar_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__Y to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__X to <16 x i32> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %0, <16 x i32> %1) #12 + // X64-NEXT: %3 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_mask_permutexvar_epi32(__W, __M, __X, __Y); } __mmask16 test_mm512_kand(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_mm512_kand - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RES:%.*]] = and <16 x i1> [[LHS]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // APPLE-LABEL: test_mm512_kand + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // APPLE-NEXT: %5 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %6 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // APPLE-NEXT: %8 = and <16 x i1> %7, %4 + // APPLE-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %10 = and <16 x i1> %8, %9 + // APPLE-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // APPLE-NEXT: ret i16 %11 + // X64-LABEL: test_mm512_kand + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %6 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // X64-NEXT: %8 = and <16 x i1> %4, %7 + // X64-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %10 = and <16 x i1> %8, %9 + // X64-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // X64-NEXT: ret i16 %11 return _mm512_mask_cmpneq_epu32_mask(_mm512_kand(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_mm512_kandn - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], - // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // APPLE-LABEL: test_mm512_kandn + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %4 = icmp eq <16 x i32> %2, %3 + // APPLE-NEXT: %5 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %6 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // APPLE-NEXT: %8 = and <16 x i1> %7, %4 + // APPLE-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %10 = and <16 x i1> %8, %9 + // APPLE-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // APPLE-NEXT: ret i16 %11 + // X64-LABEL: test_mm512_kandn + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %6 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %7 = icmp eq <16 x i32> %5, %6 + // X64-NEXT: %8 = and <16 x i1> %4, %7 + // X64-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %10 = and <16 x i1> %8, %9 + // X64-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // X64-NEXT: ret i16 %11 return _mm512_mask_cmpneq_epu32_mask(_mm512_kandn(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_mm512_kor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_mm512_kor - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RES:%.*]] = or <16 x i1> [[LHS]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // APPLE-LABEL: test_mm512_kor + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // APPLE-NEXT: %5 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %6 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // APPLE-NEXT: %8 = or <16 x i1> %7, %4 + // APPLE-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %10 = and <16 x i1> %9, %8 + // APPLE-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // APPLE-NEXT: ret i16 %11 + // X64-LABEL: test_mm512_kor + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %6 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // X64-NEXT: %8 = or <16 x i1> %4, %7 + // X64-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %10 = and <16 x i1> %9, %8 + // X64-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // X64-NEXT: ret i16 %11 return _mm512_mask_cmpneq_epu32_mask(_mm512_kor(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } int test_mm512_kortestc(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { - // CHECK-LABEL: @test_mm512_kortestc - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]] - // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16 - // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], -1 - // CHECK: zext i1 [[CMP]] to i32 + // APPLE-LABEL: test_mm512_kortestc + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %4 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %5 = icmp ne <16 x i32> %3, %4 + // APPLE-NEXT: %6 = or <16 x i1> %5, %2 + // APPLE-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // APPLE-NEXT: %8 = icmp eq i16 %7, -1 + // APPLE-NEXT: %9 = zext i1 %8 to i32 + // APPLE-NEXT: ret i32 %9 + // X64-LABEL: test_mm512_kortestc + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %4 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %5 = icmp ne <16 x i32> %3, %4 + // X64-NEXT: %6 = or <16 x i1> %2, %5 + // X64-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // X64-NEXT: %8 = icmp eq i16 %7, -1 + // X64-NEXT: %9 = zext i1 %8 to i32 + // X64-NEXT: ret i32 %9 return _mm512_kortestc(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)); } int test_mm512_kortestz(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { - // CHECK-LABEL: @test_mm512_kortestz - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]] - // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16 - // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], 0 - // CHECK: zext i1 [[CMP]] to i32 + // APPLE-LABEL: test_mm512_kortestz + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %4 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %5 = icmp ne <16 x i32> %3, %4 + // APPLE-NEXT: %6 = or <16 x i1> %5, %2 + // APPLE-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // APPLE-NEXT: %8 = icmp eq i16 %7, 0 + // APPLE-NEXT: %9 = zext i1 %8 to i32 + // APPLE-NEXT: ret i32 %9 + // X64-LABEL: test_mm512_kortestz + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %4 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %5 = icmp ne <16 x i32> %3, %4 + // X64-NEXT: %6 = or <16 x i1> %2, %5 + // X64-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // X64-NEXT: %8 = icmp eq i16 %7, 0 + // X64-NEXT: %9 = zext i1 %8 to i32 + // X64-NEXT: ret i32 %9 return _mm512_kortestz(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)); } unsigned char test_kortestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { - // CHECK-LABEL: @test_kortestz_mask16_u8 - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]] - // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16 - // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], 0 - // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 - // CHECK: trunc i32 [[ZEXT]] to i8 + // APPLE-LABEL: test_kortestz_mask16_u8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %4 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %5 = icmp ne <16 x i32> %3, %4 + // APPLE-NEXT: %6 = or <16 x i1> %5, %2 + // APPLE-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // APPLE-NEXT: %8 = icmp eq i16 %7, 0 + // APPLE-NEXT: %conv.i = zext i1 %8 to i8 + // APPLE-NEXT: ret i8 %conv.i + // X64-LABEL: test_kortestz_mask16_u8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %4 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %5 = icmp ne <16 x i32> %3, %4 + // X64-NEXT: %6 = or <16 x i1> %2, %5 + // X64-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // X64-NEXT: %8 = icmp eq i16 %7, 0 + // X64-NEXT: %conv.i = zext i1 %8 to i8 + // X64-NEXT: ret i8 %conv.i return _kortestz_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)); } unsigned char test_kortestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { - // CHECK-LABEL: @test_kortestc_mask16_u8 - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]] - // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16 - // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], -1 - // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 - // CHECK: trunc i32 [[ZEXT]] to i8 + // APPLE-LABEL: test_kortestc_mask16_u8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %4 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %5 = icmp ne <16 x i32> %3, %4 + // APPLE-NEXT: %6 = or <16 x i1> %5, %2 + // APPLE-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // APPLE-NEXT: %8 = icmp eq i16 %7, -1 + // APPLE-NEXT: %conv.i = zext i1 %8 to i8 + // APPLE-NEXT: ret i8 %conv.i + // X64-LABEL: test_kortestc_mask16_u8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %4 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %5 = icmp ne <16 x i32> %3, %4 + // X64-NEXT: %6 = or <16 x i1> %2, %5 + // X64-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // X64-NEXT: %8 = icmp eq i16 %7, -1 + // X64-NEXT: %conv.i = zext i1 %8 to i8 + // X64-NEXT: ret i8 %conv.i return _kortestc_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)); } unsigned char test_kortest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) { - // CHECK-LABEL: @test_kortest_mask16_u8 - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]] - // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16 - // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], -1 - // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 - // CHECK: trunc i32 [[ZEXT]] to i8 - // CHECK: [[LHS2:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS2:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[OR2:%.*]] = or <16 x i1> [[LHS2]], [[RHS2]] - // CHECK: [[CAST2:%.*]] = bitcast <16 x i1> [[OR2]] to i16 - // CHECK: [[CMP2:%.*]] = icmp eq i16 [[CAST2]], 0 - // CHECK: [[ZEXT2:%.*]] = zext i1 [[CMP2]] to i32 - // CHECK: trunc i32 [[ZEXT2]] to i8 + // APPLE-LABEL: test_kortest_mask16_u8 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %4 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %5 = icmp ne <16 x i32> %3, %4 + // APPLE-NEXT: %6 = or <16 x i1> %5, %2 + // APPLE-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // APPLE-NEXT: %8 = icmp eq i16 %7, -1 + // APPLE-NEXT: %conv.i = zext i1 %8 to i8 + // APPLE-NEXT: store i8 %conv.i, i8* %CF, align 1, !tbaa !2 + // APPLE-NEXT: %9 = icmp eq i16 %7, 0 + // APPLE-NEXT: %conv1.i = zext i1 %9 to i8 + // APPLE-NEXT: ret i8 %conv1.i + // X64-LABEL: test_kortest_mask16_u8 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %4 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %5 = icmp ne <16 x i32> %3, %4 + // X64-NEXT: %6 = or <16 x i1> %2, %5 + // X64-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // X64-NEXT: %8 = icmp eq i16 %7, -1 + // X64-NEXT: %conv.i = zext i1 %8 to i8 + // X64-NEXT: store i8 %conv.i, i8* %CF, align 1, !tbaa !2 + // X64-NEXT: %9 = icmp eq i16 %7, 0 + // X64-NEXT: %conv1.i = zext i1 %9 to i8 + // X64-NEXT: ret i8 %conv1.i return _kortest_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D), CF); } __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_mm512_kunpackb - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[LHS2:%.*]] = shufflevector <16 x i1> [[LHS]], <16 x i1> [[LHS]], <8 x i32> - // CHECK: [[RHS2:%.*]] = shufflevector <16 x i1> [[RHS]], <16 x i1> [[RHS]], <8 x i32> - // CHECK: [[CONCAT:%.*]] = shufflevector <8 x i1> [[RHS2]], <8 x i1> [[LHS2]], <16 x i32> - // CHECK: bitcast <16 x i1> [[CONCAT]] to i16 + // APPLE-LABEL: test_mm512_kunpackb + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // APPLE-NEXT: %5 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %6 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // APPLE-NEXT: %8 = shufflevector <16 x i1> %4, <16 x i1> undef, <8 x i32> + // APPLE-NEXT: %9 = shufflevector <16 x i1> %7, <16 x i1> undef, <8 x i32> + // APPLE-NEXT: %10 = shufflevector <8 x i1> %9, <8 x i1> %8, <16 x i32> + // APPLE-NEXT: %11 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %12 = and <16 x i1> %11, %10 + // APPLE-NEXT: %13 = bitcast <16 x i1> %12 to i16 + // APPLE-NEXT: ret i16 %13 + // X64-LABEL: test_mm512_kunpackb + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %6 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // X64-NEXT: %8 = shufflevector <16 x i1> %7, <16 x i1> undef, <8 x i32> + // X64-NEXT: %9 = shufflevector <16 x i1> %4, <16 x i1> undef, <8 x i32> + // X64-NEXT: %10 = shufflevector <8 x i1> %9, <8 x i1> %8, <16 x i32> + // X64-NEXT: %11 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %12 = and <16 x i1> %11, %10 + // X64-NEXT: %13 = bitcast <16 x i1> %12 to i16 + // X64-NEXT: ret i16 %13 return _mm512_mask_cmpneq_epu32_mask(_mm512_kunpackb(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_mm512_kxnor - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], - // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // APPLE-LABEL: test_mm512_kxnor + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %4 = icmp eq <16 x i32> %2, %3 + // APPLE-NEXT: %5 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %6 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // APPLE-NEXT: %8 = xor <16 x i1> %7, %4 + // APPLE-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %10 = and <16 x i1> %9, %8 + // APPLE-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // APPLE-NEXT: ret i16 %11 + // X64-LABEL: test_mm512_kxnor + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %6 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %7 = icmp eq <16 x i32> %5, %6 + // X64-NEXT: %8 = xor <16 x i1> %4, %7 + // X64-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %10 = and <16 x i1> %9, %8 + // X64-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // X64-NEXT: ret i16 %11 return _mm512_mask_cmpneq_epu32_mask(_mm512_kxnor(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_mm512_kxor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_mm512_kxor - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RES:%.*]] = xor <16 x i1> [[LHS]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // APPLE-LABEL: test_mm512_kxor + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // APPLE-NEXT: %5 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %6 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // APPLE-NEXT: %8 = xor <16 x i1> %7, %4 + // APPLE-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %10 = and <16 x i1> %9, %8 + // APPLE-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // APPLE-NEXT: ret i16 %11 + // X64-LABEL: test_mm512_kxor + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %6 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // X64-NEXT: %8 = xor <16 x i1> %4, %7 + // X64-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %10 = and <16 x i1> %9, %8 + // X64-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // X64-NEXT: ret i16 %11 return _mm512_mask_cmpneq_epu32_mask(_mm512_kxor(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_knot_mask16(__mmask16 a) { - // CHECK-LABEL: @test_knot_mask16 - // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], - // CHECK: bitcast <16 x i1> [[NOT]] to i16 + // APPLE-LABEL: test_knot_mask16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i16 %a to <16 x i1> + // APPLE-NEXT: %1 = xor <16 x i1> %0, + // APPLE-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // APPLE-NEXT: ret i16 %2 + // X64-LABEL: test_knot_mask16 + // X64: entry: + // X64-NEXT: %0 = bitcast i16 %a to <16 x i1> + // X64-NEXT: %1 = xor <16 x i1> %0, + // X64-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // X64-NEXT: ret i16 %2 return _knot_mask16(a); } __mmask16 test_kand_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_kand_mask16 - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RES:%.*]] = and <16 x i1> [[LHS]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // APPLE-LABEL: test_kand_mask16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // APPLE-NEXT: %5 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %6 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // APPLE-NEXT: %8 = and <16 x i1> %7, %4 + // APPLE-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %10 = and <16 x i1> %8, %9 + // APPLE-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // APPLE-NEXT: ret i16 %11 + // X64-LABEL: test_kand_mask16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %6 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // X64-NEXT: %8 = and <16 x i1> %4, %7 + // X64-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %10 = and <16 x i1> %8, %9 + // X64-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // X64-NEXT: ret i16 %11 return _mm512_mask_cmpneq_epu32_mask(_kand_mask16(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_kandn_mask16 - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], - // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // APPLE-LABEL: test_kandn_mask16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %4 = icmp eq <16 x i32> %2, %3 + // APPLE-NEXT: %5 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %6 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // APPLE-NEXT: %8 = and <16 x i1> %7, %4 + // APPLE-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %10 = and <16 x i1> %8, %9 + // APPLE-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // APPLE-NEXT: ret i16 %11 + // X64-LABEL: test_kandn_mask16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %6 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %7 = icmp eq <16 x i32> %5, %6 + // X64-NEXT: %8 = and <16 x i1> %4, %7 + // X64-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %10 = and <16 x i1> %8, %9 + // X64-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // X64-NEXT: ret i16 %11 return _mm512_mask_cmpneq_epu32_mask(_kandn_mask16(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_kor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_kor_mask16 - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RES:%.*]] = or <16 x i1> [[LHS]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // APPLE-LABEL: test_kor_mask16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // APPLE-NEXT: %5 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %6 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // APPLE-NEXT: %8 = or <16 x i1> %7, %4 + // APPLE-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %10 = and <16 x i1> %9, %8 + // APPLE-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // APPLE-NEXT: ret i16 %11 + // X64-LABEL: test_kor_mask16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %6 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // X64-NEXT: %8 = or <16 x i1> %4, %7 + // X64-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %10 = and <16 x i1> %9, %8 + // X64-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // X64-NEXT: ret i16 %11 return _mm512_mask_cmpneq_epu32_mask(_kor_mask16(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_kxnor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_kxnor_mask16 - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], - // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // APPLE-LABEL: test_kxnor_mask16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %4 = icmp eq <16 x i32> %2, %3 + // APPLE-NEXT: %5 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %6 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // APPLE-NEXT: %8 = xor <16 x i1> %7, %4 + // APPLE-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %10 = and <16 x i1> %9, %8 + // APPLE-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // APPLE-NEXT: ret i16 %11 + // X64-LABEL: test_kxnor_mask16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %6 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %7 = icmp eq <16 x i32> %5, %6 + // X64-NEXT: %8 = xor <16 x i1> %4, %7 + // X64-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %10 = and <16 x i1> %9, %8 + // X64-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // X64-NEXT: ret i16 %11 return _mm512_mask_cmpneq_epu32_mask(_kxnor_mask16(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_kxor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_kxor_mask16 - // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RES:%.*]] = xor <16 x i1> [[LHS]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // APPLE-LABEL: test_kxor_mask16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // APPLE-NEXT: %5 = bitcast <8 x i64> %__C to <16 x i32> + // APPLE-NEXT: %6 = bitcast <8 x i64> %__D to <16 x i32> + // APPLE-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // APPLE-NEXT: %8 = xor <16 x i1> %7, %4 + // APPLE-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %10 = and <16 x i1> %9, %8 + // APPLE-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // APPLE-NEXT: ret i16 %11 + // X64-LABEL: test_kxor_mask16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__E to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__F to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %__C to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %__D to <16 x i32> + // X64-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %6 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %7 = icmp ne <16 x i32> %5, %6 + // X64-NEXT: %8 = xor <16 x i1> %4, %7 + // X64-NEXT: %9 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %10 = and <16 x i1> %9, %8 + // X64-NEXT: %11 = bitcast <16 x i1> %10 to i16 + // X64-NEXT: ret i16 %11 return _mm512_mask_cmpneq_epu32_mask(_kxor_mask16(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_kshiftli_mask16(__m512i A, __m512i B, __m512i C, __m512i D) { - // CHECK-LABEL: @test_kshiftli_mask16 - // CHECK: [[VAL:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RES:%.*]] = shufflevector <16 x i1> zeroinitializer, <16 x i1> [[VAL]], <16 x i32> - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // APPLE-LABEL: test_kshiftli_mask16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %C to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %D to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %A to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %B to <16 x i32> + // APPLE-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // APPLE-NEXT: %kshiftl = shufflevector <16 x i1> , <16 x i1> %4, <16 x i32> + // APPLE-NEXT: %5 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %6 = and <16 x i1> %5, %kshiftl + // APPLE-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // APPLE-NEXT: ret i16 %7 + // X64-LABEL: test_kshiftli_mask16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %C to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %D to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %A to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %B to <16 x i32> + // X64-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // X64-NEXT: %kshiftl = shufflevector <16 x i1> , <16 x i1> %4, <16 x i32> + // X64-NEXT: %5 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %6 = and <16 x i1> %5, %kshiftl + // X64-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // X64-NEXT: ret i16 %7 return _mm512_mask_cmpneq_epu32_mask(_kshiftli_mask16(_mm512_cmpneq_epu32_mask(A, B), 1), C, D); } __mmask16 test_kshiftri_mask16(__m512i A, __m512i B, __m512i C, __m512i D) { - // CHECK-LABEL: @test_kshiftri_mask16 - // CHECK: [[VAL:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[RES:%.*]] = shufflevector <16 x i1> [[VAL]], <16 x i1> zeroinitializer, <16 x i32> - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // APPLE-LABEL: test_kshiftri_mask16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %C to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %D to <16 x i32> + // APPLE-NEXT: %2 = bitcast <8 x i64> %A to <16 x i32> + // APPLE-NEXT: %3 = bitcast <8 x i64> %B to <16 x i32> + // APPLE-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // APPLE-NEXT: %kshiftr = shufflevector <16 x i1> %4, <16 x i1> , <16 x i32> + // APPLE-NEXT: %5 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %6 = and <16 x i1> %5, %kshiftr + // APPLE-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // APPLE-NEXT: ret i16 %7 + // X64-LABEL: test_kshiftri_mask16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %C to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %D to <16 x i32> + // X64-NEXT: %2 = bitcast <8 x i64> %A to <16 x i32> + // X64-NEXT: %3 = bitcast <8 x i64> %B to <16 x i32> + // X64-NEXT: %4 = icmp ne <16 x i32> %2, %3 + // X64-NEXT: %kshiftr = shufflevector <16 x i1> %4, <16 x i1> , <16 x i32> + // X64-NEXT: %5 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %6 = and <16 x i1> %5, %kshiftr + // X64-NEXT: %7 = bitcast <16 x i1> %6 to i16 + // X64-NEXT: ret i16 %7 return _mm512_mask_cmpneq_epu32_mask(_kshiftri_mask16(_mm512_cmpneq_epu32_mask(A, B), 1), C, D); } unsigned int test_cvtmask16_u32(__m512i A, __m512i B) { - // CHECK-LABEL: @test_cvtmask16_u32 - // CHECK: bitcast <16 x i1> %{{.*}} to i16 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: zext i16 %{{.*}} to i32 + // APPLE-LABEL: test_cvtmask16_u32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %B to <16 x i32> + // APPLE-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // APPLE-NEXT: %conv.i = zext i16 %3 to i32 + // APPLE-NEXT: ret i32 %conv.i + // X64-LABEL: test_cvtmask16_u32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %B to <16 x i32> + // X64-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // X64-NEXT: %conv.i = zext i16 %3 to i32 + // X64-NEXT: ret i32 %conv.i return _cvtmask16_u32(_mm512_cmpneq_epu32_mask(A, B)); } __mmask16 test_cvtu32_mask16(__m512i A, __m512i B, unsigned int C) { - // CHECK-LABEL: @test_cvtu32_mask16 - // CHECK: trunc i32 %{{.*}} to i16 - // CHECK: bitcast i16 %{{.*}} to <16 x i1> + // APPLE-LABEL: test_cvtu32_mask16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %B to <16 x i32> + // APPLE-NEXT: %conv.i = trunc i32 %C to i16 + // APPLE-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16 %conv.i to <16 x i1> + // APPLE-NEXT: %4 = and <16 x i1> %2, %3 + // APPLE-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // APPLE-NEXT: ret i16 %5 + // X64-LABEL: test_cvtu32_mask16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %B to <16 x i32> + // X64-NEXT: %conv.i = trunc i32 %C to i16 + // X64-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16 %conv.i to <16 x i1> + // X64-NEXT: %4 = and <16 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // X64-NEXT: ret i16 %5 return _mm512_mask_cmpneq_epu32_mask(_cvtu32_mask16(C), A, B); } __mmask16 test_load_mask16(__mmask16 *A, __m512i B, __m512i C) { - // CHECK-LABEL: @test_load_mask16 - // CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}} - // CHECK: bitcast i16 [[LOAD]] to <16 x i1> + // APPLE-LABEL: test_load_mask16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %B to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %C to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16* %A to <16 x i1>* + // APPLE-NEXT: %3 = load <16 x i1>, <16 x i1>* %2, align 2, !tbaa !5 + // APPLE-NEXT: %4 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %5 = and <16 x i1> %3, %4 + // APPLE-NEXT: %6 = bitcast <16 x i1> %5 to i16 + // APPLE-NEXT: ret i16 %6 + // X64-LABEL: test_load_mask16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %B to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %C to <16 x i32> + // X64-NEXT: %2 = bitcast i16* %A to <16 x i1>* + // X64-NEXT: %3 = load <16 x i1>, <16 x i1>* %2, align 2, !tbaa !5 + // X64-NEXT: %4 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %5 = and <16 x i1> %3, %4 + // X64-NEXT: %6 = bitcast <16 x i1> %5 to i16 + // X64-NEXT: ret i16 %6 return _mm512_mask_cmpneq_epu32_mask(_load_mask16(A), B, C); } void test_store_mask16(__mmask16 *A, __m512i B, __m512i C) { - // CHECK-LABEL: @test_store_mask16 - // CHECK: bitcast <16 x i1> %{{.*}} to i16 - // CHECK: store i16 %{{.*}}, i16* %{{.*}} + // APPLE-LABEL: test_store_mask16 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %B to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %C to <16 x i32> + // APPLE-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // APPLE-NEXT: %3 = bitcast i16* %A to <16 x i1>* + // APPLE-NEXT: store <16 x i1> %2, <16 x i1>* %3, align 2, !tbaa !5 + // APPLE-NEXT: ret void + // X64-LABEL: test_store_mask16 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %B to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %C to <16 x i32> + // X64-NEXT: %2 = icmp ne <16 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i16* %A to <16 x i1>* + // X64-NEXT: store <16 x i1> %2, <16 x i1>* %3, align 2, !tbaa !5 + // X64-NEXT: ret void _store_mask16(A, _mm512_cmpneq_epu32_mask(B, C)); } void test_mm512_stream_si512(__m512i * __P, __m512i __A) { - // CHECK-LABEL: @test_mm512_stream_si512 - // CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 64, !nontemporal + // APPLE-LABEL: test_mm512_stream_si512 + // APPLE: entry: + // APPLE-NEXT: store <8 x i64> %__A, <8 x i64>* %__P, align 64, !tbaa !2, !nontemporal !7 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_stream_si512 + // X64: entry: + // X64-NEXT: store <8 x i64> %__A, <8 x i64>* %__P, align 64, !tbaa !2, !nontemporal !7 + // X64-NEXT: ret void _mm512_stream_si512(__P, __A); } __m512i test_mm512_stream_load_si512(void *__P) { - // CHECK-LABEL: @test_mm512_stream_load_si512 - // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 64, !nontemporal + // APPLE-LABEL: test_mm512_stream_load_si512 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %1 = load <8 x i64>, <8 x i64>* %0, align 64, !tbaa !2, !nontemporal !7 + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_stream_load_si512 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %1 = load <8 x i64>, <8 x i64>* %0, align 64, !tbaa !2, !nontemporal !7 + // X64-NEXT: ret <8 x i64> %1 return _mm512_stream_load_si512(__P); } __m512i test_mm512_stream_load_si512_const(void const *__P) { - // CHECK-LABEL: @test_mm512_stream_load_si512_const - // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 64, !nontemporal + // APPLE-LABEL: test_mm512_stream_load_si512_const + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // APPLE-NEXT: %1 = load <8 x i64>, <8 x i64>* %0, align 64, !tbaa !2, !nontemporal !7 + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_stream_load_si512_const + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i64>* + // X64-NEXT: %1 = load <8 x i64>, <8 x i64>* %0, align 64, !tbaa !2, !nontemporal !7 + // X64-NEXT: ret <8 x i64> %1 return _mm512_stream_load_si512(__P); } void test_mm512_stream_pd(double *__P, __m512d __A) { - // CHECK-LABEL: @test_mm512_stream_pd - // CHECK: store <8 x double> %{{.*}}, <8 x double>* %{{.*}}, align 64, !nontemporal + // APPLE-LABEL: test_mm512_stream_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast double* %__P to <8 x double>* + // APPLE-NEXT: store <8 x double> %__A, <8 x double>* %0, align 64, !tbaa !2, !nontemporal !7 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_stream_pd + // X64: entry: + // X64-NEXT: %0 = bitcast double* %__P to <8 x double>* + // X64-NEXT: store <8 x double> %__A, <8 x double>* %0, align 64, !tbaa !2, !nontemporal !7 + // X64-NEXT: ret void return _mm512_stream_pd(__P, __A); } void test_mm512_stream_ps(float *__P, __m512 __A) { - // CHECK-LABEL: @test_mm512_stream_ps - // CHECK: store <16 x float> %{{.*}}, <16 x float>* %{{.*}}, align 64, !nontemporal + // APPLE-LABEL: test_mm512_stream_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast float* %__P to <16 x float>* + // APPLE-NEXT: store <16 x float> %__A, <16 x float>* %0, align 64, !tbaa !2, !nontemporal !7 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_stream_ps + // X64: entry: + // X64-NEXT: %0 = bitcast float* %__P to <16 x float>* + // X64-NEXT: store <16 x float> %__A, <16 x float>* %0, align 64, !tbaa !2, !nontemporal !7 + // X64-NEXT: ret void _mm512_stream_ps(__P, __A); } __m512d test_mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_compress_pd - // CHECK: @llvm.x86.avx512.mask.compress + // APPLE-LABEL: test_mm512_mask_compress_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %__A, <8 x double> %__W, <8 x i1> %0) #12 + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_compress_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %__A, <8 x double> %__W, <8 x i1> %0) #12 + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_compress_pd(__W, __U, __A); } __m512d test_mm512_maskz_compress_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_compress_pd - // CHECK: @llvm.x86.avx512.mask.compress + // APPLE-LABEL: test_mm512_maskz_compress_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %__A, <8 x double> zeroinitializer, <8 x i1> %0) #12 + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_compress_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %__A, <8 x double> zeroinitializer, <8 x i1> %0) #12 + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_compress_pd(__U, __A); } __m512i test_mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_compress_epi64 - // CHECK: @llvm.x86.avx512.mask.compress + // APPLE-LABEL: test_mm512_mask_compress_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = tail call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %__A, <8 x i64> %__W, <8 x i1> %0) #12 + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_compress_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = tail call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %__A, <8 x i64> %__W, <8 x i1> %0) #12 + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_compress_epi64(__W, __U, __A); } __m512i test_mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_compress_epi64 - // CHECK: @llvm.x86.avx512.mask.compress + // APPLE-LABEL: test_mm512_maskz_compress_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = tail call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %__A, <8 x i64> zeroinitializer, <8 x i1> %0) #12 + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_compress_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = tail call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %__A, <8 x i64> zeroinitializer, <8 x i1> %0) #12 + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_compress_epi64(__U, __A); } __m512 test_mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_compress_ps - // CHECK: @llvm.x86.avx512.mask.compress + // APPLE-LABEL: test_mm512_mask_compress_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %__A, <16 x float> %__W, <16 x i1> %0) #12 + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_compress_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %__A, <16 x float> %__W, <16 x i1> %0) #12 + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_compress_ps(__W, __U, __A); } __m512 test_mm512_maskz_compress_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_compress_ps - // CHECK: @llvm.x86.avx512.mask.compress + // APPLE-LABEL: test_mm512_maskz_compress_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %__A, <16 x float> zeroinitializer, <16 x i1> %0) #12 + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_compress_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %__A, <16 x float> zeroinitializer, <16 x i1> %0) #12 + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_compress_ps(__U, __A); } __m512i test_mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_compress_epi32 - // CHECK: @llvm.x86.avx512.mask.compress + // APPLE-LABEL: test_mm512_mask_compress_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i1> %2) #12 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_compress_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i1> %2) #12 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_compress_epi32(__W, __U, __A); } __m512i test_mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_compress_epi32 - // CHECK: @llvm.x86.avx512.mask.compress + // APPLE-LABEL: test_mm512_maskz_compress_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %0, <16 x i32> zeroinitializer, <16 x i1> %1) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_compress_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %0, <16 x i32> zeroinitializer, <16 x i1> %1) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_compress_epi32(__U, __A); } __mmask8 test_mm_cmp_round_ss_mask(__m128 __X, __m128 __Y) { - // CHECK-LABEL: @test_mm_cmp_round_ss_mask - // CHECK: @llvm.x86.avx512.mask.cmp + // APPLE-LABEL: test_mm_cmp_round_ss_mask + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %__X, <4 x float> %__Y, i32 5, i8 -1, i32 4) + // APPLE-NEXT: ret i8 %0 + // X64-LABEL: test_mm_cmp_round_ss_mask + // X64: entry: + // X64-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %__X, <4 x float> %__Y, i32 5, i8 -1, i32 4) + // X64-NEXT: ret i8 %0 return _mm_cmp_round_ss_mask(__X, __Y, 5, _MM_FROUND_CUR_DIRECTION); } __mmask8 test_mm_mask_cmp_round_ss_mask(__mmask8 __M, __m128 __X, __m128 __Y) { - // CHECK-LABEL: @test_mm_mask_cmp_round_ss_mask - // CHECK: @llvm.x86.avx512.mask.cmp + // APPLE-LABEL: test_mm_mask_cmp_round_ss_mask + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %__X, <4 x float> %__Y, i32 5, i8 %__M, i32 4) + // APPLE-NEXT: ret i8 %0 + // X64-LABEL: test_mm_mask_cmp_round_ss_mask + // X64: entry: + // X64-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %__X, <4 x float> %__Y, i32 5, i8 %__M, i32 4) + // X64-NEXT: ret i8 %0 return _mm_mask_cmp_round_ss_mask(__M, __X, __Y, 5, _MM_FROUND_CUR_DIRECTION); } __mmask8 test_mm_cmp_ss_mask(__m128 __X, __m128 __Y) { - // CHECK-LABEL: @test_mm_cmp_ss_mask - // CHECK: @llvm.x86.avx512.mask.cmp + // APPLE-LABEL: test_mm_cmp_ss_mask + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %__X, <4 x float> %__Y, i32 5, i8 -1, i32 4) + // APPLE-NEXT: ret i8 %0 + // X64-LABEL: test_mm_cmp_ss_mask + // X64: entry: + // X64-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %__X, <4 x float> %__Y, i32 5, i8 -1, i32 4) + // X64-NEXT: ret i8 %0 return _mm_cmp_ss_mask(__X, __Y, 5); } __mmask8 test_mm_mask_cmp_ss_mask(__mmask8 __M, __m128 __X, __m128 __Y) { - // CHECK-LABEL: @test_mm_mask_cmp_ss_mask - // CHECK: @llvm.x86.avx512.mask.cmp + // APPLE-LABEL: test_mm_mask_cmp_ss_mask + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %__X, <4 x float> %__Y, i32 5, i8 %__M, i32 4) + // APPLE-NEXT: ret i8 %0 + // X64-LABEL: test_mm_mask_cmp_ss_mask + // X64: entry: + // X64-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %__X, <4 x float> %__Y, i32 5, i8 %__M, i32 4) + // X64-NEXT: ret i8 %0 return _mm_mask_cmp_ss_mask(__M, __X, __Y, 5); } __mmask8 test_mm_cmp_round_sd_mask(__m128d __X, __m128d __Y) { - // CHECK-LABEL: @test_mm_cmp_round_sd_mask - // CHECK: @llvm.x86.avx512.mask.cmp + // APPLE-LABEL: test_mm_cmp_round_sd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %__X, <2 x double> %__Y, i32 5, i8 -1, i32 4) + // APPLE-NEXT: ret i8 %0 + // X64-LABEL: test_mm_cmp_round_sd_mask + // X64: entry: + // X64-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %__X, <2 x double> %__Y, i32 5, i8 -1, i32 4) + // X64-NEXT: ret i8 %0 return _mm_cmp_round_sd_mask(__X, __Y, 5, _MM_FROUND_CUR_DIRECTION); } __mmask8 test_mm_mask_cmp_round_sd_mask(__mmask8 __M, __m128d __X, __m128d __Y) { - // CHECK-LABEL: @test_mm_mask_cmp_round_sd_mask - // CHECK: @llvm.x86.avx512.mask.cmp + // APPLE-LABEL: test_mm_mask_cmp_round_sd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %__X, <2 x double> %__Y, i32 5, i8 %__M, i32 4) + // APPLE-NEXT: ret i8 %0 + // X64-LABEL: test_mm_mask_cmp_round_sd_mask + // X64: entry: + // X64-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %__X, <2 x double> %__Y, i32 5, i8 %__M, i32 4) + // X64-NEXT: ret i8 %0 return _mm_mask_cmp_round_sd_mask(__M, __X, __Y, 5, _MM_FROUND_CUR_DIRECTION); } __mmask8 test_mm_cmp_sd_mask(__m128d __X, __m128d __Y) { - // CHECK-LABEL: @test_mm_cmp_sd_mask - // CHECK: @llvm.x86.avx512.mask.cmp + // APPLE-LABEL: test_mm_cmp_sd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %__X, <2 x double> %__Y, i32 5, i8 -1, i32 4) + // APPLE-NEXT: ret i8 %0 + // X64-LABEL: test_mm_cmp_sd_mask + // X64: entry: + // X64-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %__X, <2 x double> %__Y, i32 5, i8 -1, i32 4) + // X64-NEXT: ret i8 %0 return _mm_cmp_sd_mask(__X, __Y, 5); } __mmask8 test_mm_mask_cmp_sd_mask(__mmask8 __M, __m128d __X, __m128d __Y) { - // CHECK-LABEL: @test_mm_mask_cmp_sd_mask - // CHECK: @llvm.x86.avx512.mask.cmp + // APPLE-LABEL: test_mm_mask_cmp_sd_mask + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %__X, <2 x double> %__Y, i32 5, i8 %__M, i32 4) + // APPLE-NEXT: ret i8 %0 + // X64-LABEL: test_mm_mask_cmp_sd_mask + // X64: entry: + // X64-NEXT: %0 = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %__X, <2 x double> %__Y, i32 5, i8 %__M, i32 4) + // X64-NEXT: ret i8 %0 return _mm_mask_cmp_sd_mask(__M, __X, __Y, 5); } __m512 test_mm512_movehdup_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_movehdup_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_movehdup_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <16 x float> %__A, <16 x float> undef, <16 x i32> + // APPLE-NEXT: ret <16 x float> %shuffle.i + // X64-LABEL: test_mm512_movehdup_ps + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <16 x float> %__A, <16 x float> undef, <16 x i32> + // X64-NEXT: ret <16 x float> %shuffle.i return _mm512_movehdup_ps(__A); } __m512 test_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_movehdup_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_movehdup_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_movehdup_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> %__W + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_movehdup_ps(__W, __U, __A); } __m512 test_mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_movehdup_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_movehdup_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_movehdup_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_movehdup_ps(__U, __A); } __m512 test_mm512_moveldup_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_moveldup_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_moveldup_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <16 x float> %__A, <16 x float> undef, <16 x i32> + // APPLE-NEXT: ret <16 x float> %shuffle.i + // X64-LABEL: test_mm512_moveldup_ps + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <16 x float> %__A, <16 x float> undef, <16 x i32> + // X64-NEXT: ret <16 x float> %shuffle.i return _mm512_moveldup_ps(__A); } __m512 test_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_moveldup_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_moveldup_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_moveldup_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> %__W + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_moveldup_ps(__W, __U, __A); } __m512 test_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_moveldup_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_moveldup_ps + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <16 x i32> + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_moveldup_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <16 x i32> + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %shuffle.i.i, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_moveldup_ps(__U, __A); } __m512i test_mm512_shuffle_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_shuffle_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <16 x i32> + // APPLE-LABEL: test_mm512_shuffle_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %permil = shufflevector <16 x i32> %0, <16 x i32> undef, <16 x i32> + // APPLE-NEXT: %1 = bitcast <16 x i32> %permil to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_shuffle_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %permil = shufflevector <16 x i32> %0, <16 x i32> undef, <16 x i32> + // X64-NEXT: %1 = bitcast <16 x i32> %permil to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_shuffle_epi32(__A, 1); } __m512i test_mm512_mask_shuffle_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_shuffle_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_shuffle_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %permil = shufflevector <16 x i32> %0, <16 x i32> undef, <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %permil, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_shuffle_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %permil = shufflevector <16 x i32> %0, <16 x i32> undef, <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %permil, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_shuffle_epi32(__W, __U, __A, 1); } __m512i test_mm512_maskz_shuffle_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_shuffle_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %permil = shufflevector <16 x i32> %0, <16 x i32> undef, <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x i32> %permil, <16 x i32> zeroinitializer + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_shuffle_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %permil = shufflevector <16 x i32> %0, <16 x i32> undef, <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x i32> %permil, <16 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_shuffle_epi32(__U, __A, 1); } __m512d test_mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_expand_pd - // CHECK: @llvm.x86.avx512.mask.expand + // APPLE-LABEL: test_mm512_mask_expand_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %__A, <8 x double> %__W, <8 x i1> %0) #12 + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_expand_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %__A, <8 x double> %__W, <8 x i1> %0) #12 + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_expand_pd(__W, __U, __A); } __m512d test_mm512_maskz_expand_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_expand_pd - // CHECK: @llvm.x86.avx512.mask.expand + // APPLE-LABEL: test_mm512_maskz_expand_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %__A, <8 x double> zeroinitializer, <8 x i1> %0) #12 + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_expand_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = tail call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %__A, <8 x double> zeroinitializer, <8 x i1> %0) #12 + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_expand_pd(__U, __A); } __m512i test_mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_expand_epi64 - // CHECK: @llvm.x86.avx512.mask.expand + // APPLE-LABEL: test_mm512_mask_expand_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = tail call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %__A, <8 x i64> %__W, <8 x i1> %0) #12 + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_expand_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = tail call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %__A, <8 x i64> %__W, <8 x i1> %0) #12 + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_expand_epi64(__W, __U, __A); } __m512i test_mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_expand_epi64 - // CHECK: @llvm.x86.avx512.mask.expand + // APPLE-LABEL: test_mm512_maskz_expand_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = tail call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %__A, <8 x i64> zeroinitializer, <8 x i1> %0) #12 + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_expand_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = tail call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %__A, <8 x i64> zeroinitializer, <8 x i1> %0) #12 + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_expand_epi64(__U, __A); } __m512i test_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_mask_expandloadu_epi64 - // CHECK: @llvm.masked.expandload.v8i64(i64* %{{.*}}, <8 x i1> %{{.*}}, <8 x i64> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_expandloadu_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to i64* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x i64> @llvm.masked.expandload.v8i64(i64* %0, <8 x i1> %1, <8 x i64> %__W) #12 + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_expandloadu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to i64* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x i64> @llvm.masked.expandload.v8i64(i64* %0, <8 x i1> %1, <8 x i64> %__W) #12 + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_expandloadu_epi64(__W, __U, __P); } __m512i test_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_maskz_expandloadu_epi64 - // CHECK: @llvm.masked.expandload.v8i64(i64* %{{.*}}, <8 x i1> %{{.*}}, <8 x i64> %{{.*}}) + // APPLE-LABEL: test_mm512_maskz_expandloadu_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to i64* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x i64> @llvm.masked.expandload.v8i64(i64* %0, <8 x i1> %1, <8 x i64> zeroinitializer) #12 + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_expandloadu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to i64* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x i64> @llvm.masked.expandload.v8i64(i64* %0, <8 x i1> %1, <8 x i64> zeroinitializer) #12 + // X64-NEXT: ret <8 x i64> %2 return _mm512_maskz_expandloadu_epi64(__U, __P); } __m512d test_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_mask_expandloadu_pd - // CHECK: @llvm.masked.expandload.v8f64(double* %{{.*}}, <8 x i1> %{{.*}}, <8 x double> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_expandloadu_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to double* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x double> @llvm.masked.expandload.v8f64(double* %0, <8 x i1> %1, <8 x double> %__W) #12 + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_expandloadu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to double* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x double> @llvm.masked.expandload.v8f64(double* %0, <8 x i1> %1, <8 x double> %__W) #12 + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_expandloadu_pd(__W, __U, __P); } __m512d test_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_maskz_expandloadu_pd - // CHECK: @llvm.masked.expandload.v8f64(double* %{{.*}}, <8 x i1> %{{.*}}, <8 x double> %{{.*}}) + // APPLE-LABEL: test_mm512_maskz_expandloadu_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to double* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = tail call <8 x double> @llvm.masked.expandload.v8f64(double* %0, <8 x i1> %1, <8 x double> zeroinitializer) #12 + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_expandloadu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to double* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x double> @llvm.masked.expandload.v8f64(double* %0, <8 x i1> %1, <8 x double> zeroinitializer) #12 + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_expandloadu_pd(__U, __P); } __m512i test_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_mask_expandloadu_epi32 - // CHECK: @llvm.masked.expandload.v16i32(i32* %{{.*}}, <16 x i1> %{{.*}}, <16 x i32> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_expandloadu_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %1 = bitcast i8* %__P to i32* + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = tail call <16 x i32> @llvm.masked.expandload.v16i32(i32* %1, <16 x i1> %2, <16 x i32> %0) #12 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_expandloadu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %1 = bitcast i8* %__P to i32* + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = tail call <16 x i32> @llvm.masked.expandload.v16i32(i32* %1, <16 x i1> %2, <16 x i32> %0) #12 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_expandloadu_epi32(__W, __U, __P); } __m512i test_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_maskz_expandloadu_epi32 - // CHECK: @llvm.masked.expandload.v16i32(i32* %{{.*}}, <16 x i1> %{{.*}}, <16 x i32> %{{.*}}) + // APPLE-LABEL: test_mm512_maskz_expandloadu_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to i32* + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.masked.expandload.v16i32(i32* %0, <16 x i1> %1, <16 x i32> zeroinitializer) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_expandloadu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to i32* + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.masked.expandload.v16i32(i32* %0, <16 x i1> %1, <16 x i32> zeroinitializer) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_expandloadu_epi32(__U, __P); } __m512 test_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_mask_expandloadu_ps - // CHECK: @llvm.masked.expandload.v16f32(float* %{{.*}}, <16 x i1> %{{.*}}, <16 x float> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_expandloadu_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to float* + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = tail call <16 x float> @llvm.masked.expandload.v16f32(float* %0, <16 x i1> %1, <16 x float> %__W) #12 + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_expandloadu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to float* + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = tail call <16 x float> @llvm.masked.expandload.v16f32(float* %0, <16 x i1> %1, <16 x float> %__W) #12 + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_expandloadu_ps(__W, __U, __P); } __m512 test_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_maskz_expandloadu_ps - // CHECK: @llvm.masked.expandload.v16f32(float* %{{.*}}, <16 x i1> %{{.*}}, <16 x float> %{{.*}}) + // APPLE-LABEL: test_mm512_maskz_expandloadu_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to float* + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = tail call <16 x float> @llvm.masked.expandload.v16f32(float* %0, <16 x i1> %1, <16 x float> zeroinitializer) #12 + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_expandloadu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to float* + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = tail call <16 x float> @llvm.masked.expandload.v16f32(float* %0, <16 x i1> %1, <16 x float> zeroinitializer) #12 + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_expandloadu_ps(__U, __P); } __m512 test_mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_expand_ps - // CHECK: @llvm.x86.avx512.mask.expand + // APPLE-LABEL: test_mm512_mask_expand_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %__A, <16 x float> %__W, <16 x i1> %0) #12 + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_expand_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %__A, <16 x float> %__W, <16 x i1> %0) #12 + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_expand_ps(__W, __U, __A); } __m512 test_mm512_maskz_expand_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_expand_ps - // CHECK: @llvm.x86.avx512.mask.expand + // APPLE-LABEL: test_mm512_maskz_expand_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %__A, <16 x float> zeroinitializer, <16 x i1> %0) #12 + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_expand_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %__A, <16 x float> zeroinitializer, <16 x i1> %0) #12 + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_expand_ps(__U, __A); } __m512i test_mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_expand_epi32 - // CHECK: @llvm.x86.avx512.mask.expand + // APPLE-LABEL: test_mm512_mask_expand_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i1> %2) #12 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_expand_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = tail call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i1> %2) #12 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_expand_epi32(__W, __U, __A); } __m512i test_mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_expand_epi32 - // CHECK: @llvm.x86.avx512.mask.expand + // APPLE-LABEL: test_mm512_maskz_expand_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %0, <16 x i32> zeroinitializer, <16 x i1> %1) #12 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_expand_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = tail call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %0, <16 x i32> zeroinitializer, <16 x i1> %1) #12 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_expand_epi32(__U, __A); } __m512d test_mm512_cvt_roundps_pd(__m256 __A) { - // CHECK-LABEL: @test_mm512_cvt_roundps_pd - // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + // APPLE-LABEL: test_mm512_cvt_roundps_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %__A, <8 x double> zeroinitializer, i8 -1, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_cvt_roundps_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %__A, <8 x double> zeroinitializer, i8 -1, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_cvt_roundps_pd(__A, _MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_mask_cvt_roundps_pd(__m512d __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundps_pd - // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + // APPLE-LABEL: test_mm512_mask_cvt_roundps_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %__A, <8 x double> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_cvt_roundps_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %__A, <8 x double> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_cvt_roundps_pd(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_maskz_cvt_roundps_pd(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_pd - // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + // APPLE-LABEL: test_mm512_maskz_cvt_roundps_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %__A, <8 x double> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_maskz_cvt_roundps_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %__A, <8 x double> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_maskz_cvt_roundps_pd(__U, __A, _MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_cvtps_pd(__m256 __A) { - // CHECK-LABEL: @test_mm512_cvtps_pd - // CHECK: fpext <8 x float> %{{.*}} to <8 x double> + // APPLE-LABEL: test_mm512_cvtps_pd + // APPLE: entry: + // APPLE-NEXT: %conv.i = fpext <8 x float> %__A to <8 x double> + // APPLE-NEXT: ret <8 x double> %conv.i + // X64-LABEL: test_mm512_cvtps_pd + // X64: entry: + // X64-NEXT: %conv.i = fpext <8 x float> %__A to <8 x double> + // X64-NEXT: ret <8 x double> %conv.i return _mm512_cvtps_pd(__A); } __m512d test_mm512_cvtpslo_pd(__m512 __A) { - // CHECK-LABEL: @test_mm512_cvtpslo_pd - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <8 x i32> - // CHECK: fpext <8 x float> %{{.*}} to <8 x double> + // APPLE-LABEL: test_mm512_cvtpslo_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> + // APPLE-NEXT: %conv.i.i = fpext <8 x float> %shuffle.i.i to <8 x double> + // APPLE-NEXT: ret <8 x double> %conv.i.i + // X64-LABEL: test_mm512_cvtpslo_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> + // X64-NEXT: %conv.i.i = fpext <8 x float> %shuffle.i.i to <8 x double> + // X64-NEXT: ret <8 x double> %conv.i.i return _mm512_cvtpslo_pd(__A); } __m512d test_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm512_mask_cvtps_pd - // CHECK: fpext <8 x float> %{{.*}} to <8 x double> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtps_pd + // APPLE: entry: + // APPLE-NEXT: %conv.i.i = fpext <8 x float> %__A to <8 x double> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %conv.i.i, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_cvtps_pd + // X64: entry: + // X64-NEXT: %conv.i.i = fpext <8 x float> %__A to <8 x double> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %conv.i.i, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_cvtps_pd(__W, __U, __A); } __m512d test_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvtpslo_pd - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <8 x i32> - // CHECK: fpext <8 x float> %{{.*}} to <8 x double> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtpslo_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> + // APPLE-NEXT: %conv.i.i.i = fpext <8 x float> %shuffle.i.i to <8 x double> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %conv.i.i.i, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_cvtpslo_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> + // X64-NEXT: %conv.i.i.i = fpext <8 x float> %shuffle.i.i to <8 x double> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %conv.i.i.i, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_cvtpslo_pd(__W, __U, __A); } __m512d test_mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtps_pd - // CHECK: fpext <8 x float> %{{.*}} to <8 x double> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtps_pd + // APPLE: entry: + // APPLE-NEXT: %conv.i.i = fpext <8 x float> %__A to <8 x double> + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %conv.i.i, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_cvtps_pd + // X64: entry: + // X64-NEXT: %conv.i.i = fpext <8 x float> %__A to <8 x double> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %conv.i.i, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_cvtps_pd(__U, __A); } __m512d test_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_mov_pd - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_mov_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %__A, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_mov_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %__A, <8 x double> %__W + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_mov_pd(__W, __U, __A); } __m512d test_mm512_maskz_mov_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_mov_pd - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_mov_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x double> %__A, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_maskz_mov_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x double> %__A, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %1 return _mm512_maskz_mov_pd(__U, __A); } __m512 test_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_mov_ps - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_mov_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %__A, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_mov_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %__A, <16 x float> %__W + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_mov_ps(__W, __U, __A); } __m512 test_mm512_maskz_mov_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_mov_ps - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_mov_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x float> %__A, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_mov_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x float> %__A, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_mov_ps(__U, __A); } void test_mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_compressstoreu_pd - // CHECK: @llvm.masked.compressstore.v8f64(<8 x double> %{{.*}}, double* %{{.*}}, <8 x i1> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_compressstoreu_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to double* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: tail call void @llvm.masked.compressstore.v8f64(<8 x double> %__A, double* %0, <8 x i1> %1) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_compressstoreu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to double* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: tail call void @llvm.masked.compressstore.v8f64(<8 x double> %__A, double* %0, <8 x i1> %1) #12 + // X64-NEXT: ret void return _mm512_mask_compressstoreu_pd(__P, __U, __A); } void test_mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_compressstoreu_epi64 - // CHECK: @llvm.masked.compressstore.v8i64(<8 x i64> %{{.*}}, i64* %{{.*}}, <8 x i1> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_compressstoreu_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to i64* + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: tail call void @llvm.masked.compressstore.v8i64(<8 x i64> %__A, i64* %0, <8 x i1> %1) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_compressstoreu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to i64* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: tail call void @llvm.masked.compressstore.v8i64(<8 x i64> %__A, i64* %0, <8 x i1> %1) #12 + // X64-NEXT: ret void return _mm512_mask_compressstoreu_epi64(__P, __U, __A); } void test_mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_compressstoreu_ps - // CHECK: @llvm.masked.compressstore.v16f32(<16 x float> %{{.*}}, float* %{{.*}}, <16 x i1> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_compressstoreu_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast i8* %__P to float* + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: tail call void @llvm.masked.compressstore.v16f32(<16 x float> %__A, float* %0, <16 x i1> %1) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_compressstoreu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to float* + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: tail call void @llvm.masked.compressstore.v16f32(<16 x float> %__A, float* %0, <16 x i1> %1) #12 + // X64-NEXT: ret void return _mm512_mask_compressstoreu_ps(__P, __U, __A); } void test_mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_compressstoreu_epi32 - // CHECK: @llvm.masked.compressstore.v16i32(<16 x i32> %{{.*}}, i32* %{{.*}}, <16 x i1> %{{.*}}) + // APPLE-LABEL: test_mm512_mask_compressstoreu_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast i8* %__P to i32* + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: tail call void @llvm.masked.compressstore.v16i32(<16 x i32> %0, i32* %1, <16 x i1> %2) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm512_mask_compressstoreu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast i8* %__P to i32* + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: tail call void @llvm.masked.compressstore.v16i32(<16 x i32> %0, i32* %1, <16 x i1> %2) #12 + // X64-NEXT: ret void return _mm512_mask_compressstoreu_epi32(__P, __U, __A); } __m256i test_mm512_cvtt_roundpd_epu32(__m512d __A) { - // CHECK-LABEL: @test_mm512_cvtt_roundpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 + // APPLE-LABEL: test_mm512_cvtt_roundpd_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 -1, i32 4) + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_cvtt_roundpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 -1, i32 4) + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_cvtt_roundpd_epu32(__A, _MM_FROUND_CUR_DIRECTION); } __m256i test_mm512_mask_cvtt_roundpd_epu32(__m256i __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 + // APPLE-LABEL: test_mm512_mask_cvtt_roundpd_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // APPLE-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %__A, <8 x i32> %0, i8 %__U, i32 4) + // APPLE-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtt_roundpd_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %__A, <8 x i32> %0, i8 %__U, i32 4) + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_mask_cvtt_roundpd_epu32(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); } __m256i test_mm512_maskz_cvtt_roundpd_epu32(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 + // APPLE-LABEL: test_mm512_maskz_cvtt_roundpd_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtt_roundpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_maskz_cvtt_roundpd_epu32(__U, __A, _MM_FROUND_CUR_DIRECTION); } __m256i test_mm512_cvttpd_epu32(__m512d __A) { - // CHECK-LABEL: @test_mm512_cvttpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 + // APPLE-LABEL: test_mm512_cvttpd_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 -1, i32 4) #12 + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_cvttpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 -1, i32 4) #12 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_cvttpd_epu32(__A); } __m256i test_mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvttpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 + // APPLE-LABEL: test_mm512_mask_cvttpd_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // APPLE-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %__A, <8 x i32> %0, i8 %__U, i32 4) #12 + // APPLE-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_mask_cvttpd_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %__A, <8 x i32> %0, i8 %__U, i32 4) #12 + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_mask_cvttpd_epu32(__W, __U, __A); } __m256i test_mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_cvttpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 + // APPLE-LABEL: test_mm512_maskz_cvttpd_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 %__U, i32 4) #12 + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvttpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 %__U, i32 4) #12 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_maskz_cvttpd_epu32(__U, __A); } __m512 test_mm512_castpd_ps (__m512d __A) { - // CHECK-LABEL: @test_mm512_castpd_ps - // CHECK: bitcast <8 x double> %{{.}} to <16 x float> + // APPLE-LABEL: test_mm512_castpd_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x double> %__A to <16 x float> + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_castpd_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x double> %__A to <16 x float> + // X64-NEXT: ret <16 x float> %0 return _mm512_castpd_ps (__A); } __m512d test_mm512_castps_pd (__m512 __A) { - // CHECK-LABEL: @test_mm512_castps_pd - // CHECK: bitcast <16 x float> %{{.}} to <8 x double> + // APPLE-LABEL: test_mm512_castps_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <16 x float> %__A to <8 x double> + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_castps_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <16 x float> %__A to <8 x double> + // X64-NEXT: ret <8 x double> %0 return _mm512_castps_pd (__A); } __m512i test_mm512_castpd_si512 (__m512d __A) { - // CHECK-LABEL: @test_mm512_castpd_si512 - // CHECK: bitcast <8 x double> %{{.}} to <8 x i64> + // APPLE-LABEL: test_mm512_castpd_si512 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x double> %__A to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_castpd_si512 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x double> %__A to <8 x i64> + // X64-NEXT: ret <8 x i64> %0 return _mm512_castpd_si512 (__A); } __m512 test_mm512_castps128_ps512(__m128 __A) { - // CHECK-LABEL: @test_mm512_castps128_ps512 - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_castps128_ps512 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <4 x float> %__A, <4 x float> undef, <16 x i32> + // APPLE-NEXT: ret <16 x float> %shuffle.i + // X64-LABEL: test_mm512_castps128_ps512 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <4 x float> %__A, <4 x float> undef, <16 x i32> + // X64-NEXT: ret <16 x float> %shuffle.i return _mm512_castps128_ps512(__A); } __m512d test_mm512_castpd128_pd512(__m128d __A) { - // CHECK-LABEL: @test_mm512_castpd128_pd512 - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_castpd128_pd512 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <2 x double> %__A, <2 x double> undef, <8 x i32> + // APPLE-NEXT: ret <8 x double> %shuffle.i + // X64-LABEL: test_mm512_castpd128_pd512 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <2 x double> %__A, <2 x double> undef, <8 x i32> + // X64-NEXT: ret <8 x double> %shuffle.i return _mm512_castpd128_pd512(__A); } __m512i test_mm512_set1_epi8(char d) { - // CHECK-LABEL: @test_mm512_set1_epi8 - // CHECK: insertelement <64 x i8> {{.*}}, i32 0 - // CHECK: insertelement <64 x i8> {{.*}}, i32 1 - // CHECK: insertelement <64 x i8> {{.*}}, i32 2 - // CHECK: insertelement <64 x i8> {{.*}}, i32 3 - // CHECK: insertelement <64 x i8> {{.*}}, i32 4 - // CHECK: insertelement <64 x i8> {{.*}}, i32 5 - // CHECK: insertelement <64 x i8> {{.*}}, i32 6 - // CHECK: insertelement <64 x i8> {{.*}}, i32 7 - // CHECK: insertelement <64 x i8> {{.*}}, i32 63 + // APPLE-LABEL: test_mm512_set1_epi8 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <64 x i8> undef, i8 %d, i32 0 + // APPLE-NEXT: %vecinit63.i = shufflevector <64 x i8> %vecinit.i, <64 x i8> undef, <64 x i32> zeroinitializer + // APPLE-NEXT: %0 = bitcast <64 x i8> %vecinit63.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_set1_epi8 + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <64 x i8> undef, i8 %d, i32 0 + // X64-NEXT: %vecinit63.i = shufflevector <64 x i8> %vecinit.i, <64 x i8> undef, <64 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast <64 x i8> %vecinit63.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %0 return _mm512_set1_epi8(d); } __m512i test_mm512_set1_epi16(short d) { - // CHECK-LABEL: @test_mm512_set1_epi16 - // CHECK: insertelement <32 x i16> {{.*}}, i32 0 - // CHECK: insertelement <32 x i16> {{.*}}, i32 1 - // CHECK: insertelement <32 x i16> {{.*}}, i32 2 - // CHECK: insertelement <32 x i16> {{.*}}, i32 3 - // CHECK: insertelement <32 x i16> {{.*}}, i32 4 - // CHECK: insertelement <32 x i16> {{.*}}, i32 5 - // CHECK: insertelement <32 x i16> {{.*}}, i32 6 - // CHECK: insertelement <32 x i16> {{.*}}, i32 7 - // CHECK: insertelement <32 x i16> {{.*}}, i32 31 + // APPLE-LABEL: test_mm512_set1_epi16 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <32 x i16> undef, i16 %d, i32 0 + // APPLE-NEXT: %vecinit31.i = shufflevector <32 x i16> %vecinit.i, <32 x i16> undef, <32 x i32> zeroinitializer + // APPLE-NEXT: %0 = bitcast <32 x i16> %vecinit31.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_set1_epi16 + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <32 x i16> undef, i16 %d, i32 0 + // X64-NEXT: %vecinit31.i = shufflevector <32 x i16> %vecinit.i, <32 x i16> undef, <32 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast <32 x i16> %vecinit31.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %0 return _mm512_set1_epi16(d); } __m512i test_mm512_set4_epi32 (int __A, int __B, int __C, int __D) { - // CHECK-LABEL: @test_mm512_set4_epi32 - // CHECK: insertelement <16 x i32> {{.*}}, i32 15 + // APPLE-LABEL: test_mm512_set4_epi32 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <16 x i32> undef, i32 %__D, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %__C, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %__B, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %__A, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %__D, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %__C, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %__B, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %__A, i32 7 + // APPLE-NEXT: %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %__D, i32 8 + // APPLE-NEXT: %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %__C, i32 9 + // APPLE-NEXT: %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %__B, i32 10 + // APPLE-NEXT: %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %__A, i32 11 + // APPLE-NEXT: %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %__D, i32 12 + // APPLE-NEXT: %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %__C, i32 13 + // APPLE-NEXT: %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %__B, i32 14 + // APPLE-NEXT: %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %__A, i32 15 + // APPLE-NEXT: %0 = bitcast <16 x i32> %vecinit15.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_set4_epi32 + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <16 x i32> undef, i32 %__D, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %__C, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %__B, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %__A, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %__D, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %__C, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %__B, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %__A, i32 7 + // X64-NEXT: %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %__D, i32 8 + // X64-NEXT: %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %__C, i32 9 + // X64-NEXT: %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %__B, i32 10 + // X64-NEXT: %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %__A, i32 11 + // X64-NEXT: %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %__D, i32 12 + // X64-NEXT: %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %__C, i32 13 + // X64-NEXT: %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %__B, i32 14 + // X64-NEXT: %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %__A, i32 15 + // X64-NEXT: %0 = bitcast <16 x i32> %vecinit15.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %0 return _mm512_set4_epi32 (__A,__B,__C,__D); } __m512i test_mm512_set4_epi64 (long long __A, long long __B, long long __C, long long __D) { - // CHECK-LABEL: @test_mm512_set4_epi64 - // CHECK: insertelement <8 x i64> {{.*}}, i32 7 + // APPLE-LABEL: test_mm512_set4_epi64 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <8 x i64> undef, i64 %__D, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %__C, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %__B, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %__A, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %__D, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %__C, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %__B, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %__A, i32 7 + // APPLE-NEXT: ret <8 x i64> %vecinit7.i + // X64-LABEL: test_mm512_set4_epi64 + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <8 x i64> undef, i64 %__D, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %__C, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %__B, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %__A, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %__D, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %__C, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %__B, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %__A, i32 7 + // X64-NEXT: ret <8 x i64> %vecinit7.i return _mm512_set4_epi64 (__A,__B,__C,__D); } __m512d test_mm512_set4_pd (double __A, double __B, double __C, double __D) { - // CHECK-LABEL: @test_mm512_set4_pd - // CHECK: insertelement <8 x double> {{.*}}, i32 7 + // APPLE-LABEL: test_mm512_set4_pd + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <8 x double> undef, double %__D, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <8 x double> %vecinit.i, double %__C, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %__B, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %__A, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %__D, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %__C, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %__B, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %__A, i32 7 + // APPLE-NEXT: ret <8 x double> %vecinit7.i + // X64-LABEL: test_mm512_set4_pd + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <8 x double> undef, double %__D, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <8 x double> %vecinit.i, double %__C, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %__B, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %__A, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %__D, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %__C, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %__B, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %__A, i32 7 + // X64-NEXT: ret <8 x double> %vecinit7.i return _mm512_set4_pd (__A,__B,__C,__D); } __m512 test_mm512_set4_ps (float __A, float __B, float __C, float __D) { - // CHECK-LABEL: @test_mm512_set4_ps - // CHECK: insertelement <16 x float> {{.*}}, i32 15 + // APPLE-LABEL: test_mm512_set4_ps + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <16 x float> undef, float %__D, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <16 x float> %vecinit.i, float %__C, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %__B, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %__A, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %__D, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %__C, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %__B, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %__A, i32 7 + // APPLE-NEXT: %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %__D, i32 8 + // APPLE-NEXT: %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %__C, i32 9 + // APPLE-NEXT: %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %__B, i32 10 + // APPLE-NEXT: %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %__A, i32 11 + // APPLE-NEXT: %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %__D, i32 12 + // APPLE-NEXT: %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %__C, i32 13 + // APPLE-NEXT: %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %__B, i32 14 + // APPLE-NEXT: %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %__A, i32 15 + // APPLE-NEXT: ret <16 x float> %vecinit15.i + // X64-LABEL: test_mm512_set4_ps + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <16 x float> undef, float %__D, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <16 x float> %vecinit.i, float %__C, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %__B, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %__A, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %__D, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %__C, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %__B, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %__A, i32 7 + // X64-NEXT: %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %__D, i32 8 + // X64-NEXT: %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %__C, i32 9 + // X64-NEXT: %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %__B, i32 10 + // X64-NEXT: %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %__A, i32 11 + // X64-NEXT: %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %__D, i32 12 + // X64-NEXT: %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %__C, i32 13 + // X64-NEXT: %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %__B, i32 14 + // X64-NEXT: %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %__A, i32 15 + // X64-NEXT: ret <16 x float> %vecinit15.i return _mm512_set4_ps (__A,__B,__C,__D); } __m512i test_mm512_setr4_epi32(int e0, int e1, int e2, int e3) { - // CHECK-LABEL: @test_mm512_setr4_epi32 - // CHECK: insertelement <16 x i32> {{.*}}, i32 15 + // APPLE-LABEL: test_mm512_setr4_epi32 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <16 x i32> undef, i32 %e0, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %e1, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %e2, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %e3, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %e0, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %e1, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %e2, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %e3, i32 7 + // APPLE-NEXT: %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %e0, i32 8 + // APPLE-NEXT: %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %e1, i32 9 + // APPLE-NEXT: %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %e2, i32 10 + // APPLE-NEXT: %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %e3, i32 11 + // APPLE-NEXT: %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %e0, i32 12 + // APPLE-NEXT: %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %e1, i32 13 + // APPLE-NEXT: %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %e2, i32 14 + // APPLE-NEXT: %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %e3, i32 15 + // APPLE-NEXT: %0 = bitcast <16 x i32> %vecinit15.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_setr4_epi32 + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <16 x i32> undef, i32 %e0, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %e1, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %e2, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %e3, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %e0, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %e1, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %e2, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %e3, i32 7 + // X64-NEXT: %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %e0, i32 8 + // X64-NEXT: %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %e1, i32 9 + // X64-NEXT: %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %e2, i32 10 + // X64-NEXT: %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %e3, i32 11 + // X64-NEXT: %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %e0, i32 12 + // X64-NEXT: %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %e1, i32 13 + // X64-NEXT: %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %e2, i32 14 + // X64-NEXT: %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %e3, i32 15 + // X64-NEXT: %0 = bitcast <16 x i32> %vecinit15.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %0 return _mm512_setr4_epi32(e0, e1, e2, e3); } __m512i test_mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3) { - // CHECK-LABEL: @test_mm512_setr4_epi64 - // CHECK: insertelement <8 x i64> {{.*}}, i32 7 - return _mm512_setr4_epi64(e0, e1, e2, e3); + // APPLE-LABEL: test_mm512_setr4_epi64 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <8 x i64> undef, i64 %e0, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %e1, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %e2, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %e3, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %e0, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %e1, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %e2, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %e3, i32 7 + // APPLE-NEXT: ret <8 x i64> %vecinit7.i + // X64-LABEL: test_mm512_setr4_epi64 + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <8 x i64> undef, i64 %e0, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %e1, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %e2, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %e3, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %e0, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %e1, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %e2, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %e3, i32 7 + // X64-NEXT: ret <8 x i64> %vecinit7.i + return _mm512_setr4_epi64(e0, e1, e2, e3); } __m512d test_mm512_setr4_pd(double e0, double e1, double e2, double e3) { - // CHECK-LABEL: @test_mm512_setr4_pd - // CHECK: insertelement <8 x double> {{.*}}, i32 7 + // APPLE-LABEL: test_mm512_setr4_pd + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <8 x double> undef, double %e0, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <8 x double> %vecinit.i, double %e1, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %e2, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %e3, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %e0, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %e1, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %e2, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %e3, i32 7 + // APPLE-NEXT: ret <8 x double> %vecinit7.i + // X64-LABEL: test_mm512_setr4_pd + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <8 x double> undef, double %e0, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <8 x double> %vecinit.i, double %e1, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %e2, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %e3, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %e0, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %e1, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %e2, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %e3, i32 7 + // X64-NEXT: ret <8 x double> %vecinit7.i return _mm512_setr4_pd(e0,e1,e2,e3); } __m512 test_mm512_setr4_ps(float e0, float e1, float e2, float e3) { - // CHECK-LABEL: @test_mm512_setr4_ps - // CHECK: insertelement <16 x float> {{.*}}, i32 15 - return _mm512_setr4_ps(e0,e1,e2,e3); + // APPLE-LABEL: test_mm512_setr4_ps + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <16 x float> undef, float %e0, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <16 x float> %vecinit.i, float %e1, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %e2, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %e3, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %e0, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %e1, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %e2, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %e3, i32 7 + // APPLE-NEXT: %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %e0, i32 8 + // APPLE-NEXT: %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %e1, i32 9 + // APPLE-NEXT: %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %e2, i32 10 + // APPLE-NEXT: %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %e3, i32 11 + // APPLE-NEXT: %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %e0, i32 12 + // APPLE-NEXT: %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %e1, i32 13 + // APPLE-NEXT: %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %e2, i32 14 + // APPLE-NEXT: %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %e3, i32 15 + // APPLE-NEXT: ret <16 x float> %vecinit15.i + // X64-LABEL: test_mm512_setr4_ps + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <16 x float> undef, float %e0, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <16 x float> %vecinit.i, float %e1, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %e2, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %e3, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %e0, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %e1, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %e2, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %e3, i32 7 + // X64-NEXT: %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %e0, i32 8 + // X64-NEXT: %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %e1, i32 9 + // X64-NEXT: %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %e2, i32 10 + // X64-NEXT: %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %e3, i32 11 + // X64-NEXT: %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %e0, i32 12 + // X64-NEXT: %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %e1, i32 13 + // X64-NEXT: %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %e2, i32 14 + // X64-NEXT: %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %e3, i32 15 + // X64-NEXT: ret <16 x float> %vecinit15.i + return _mm512_setr4_ps(e0, e1, e2, e3); } __m512d test_mm512_castpd256_pd512(__m256d a) { - // CHECK-LABEL: @test_mm512_castpd256_pd512 - // CHECK: shufflevector <4 x double> {{.*}} + // APPLE-LABEL: test_mm512_castpd256_pd512 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> + // APPLE-NEXT: ret <8 x double> %shuffle.i + // X64-LABEL: test_mm512_castpd256_pd512 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> + // X64-NEXT: ret <8 x double> %shuffle.i return _mm512_castpd256_pd512(a); } __m256d test_mm512_castpd512_pd256 (__m512d __A) { - // CHECK-LABEL: @test_mm512_castpd512_pd256 - // CHECK: shufflevector <8 x double> %{{.}}, <8 x double> %{{.}}, <4 x i32> + // APPLE-LABEL: test_mm512_castpd512_pd256 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> + // APPLE-NEXT: ret <4 x double> %shuffle.i + // X64-LABEL: test_mm512_castpd512_pd256 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> + // X64-NEXT: ret <4 x double> %shuffle.i return _mm512_castpd512_pd256 (__A); } __m256 test_mm512_castps512_ps256 (__m512 __A) { - // CHECK-LABEL: @test_mm512_castps512_ps256 - // CHECK: shufflevector <16 x float> %{{.}}, <16 x float> %{{.}}, <8 x i32> + // APPLE-LABEL: test_mm512_castps512_ps256 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> + // APPLE-NEXT: ret <8 x float> %shuffle.i + // X64-LABEL: test_mm512_castps512_ps256 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> + // X64-NEXT: ret <8 x float> %shuffle.i return _mm512_castps512_ps256 (__A); } __m512i test_mm512_castps_si512 (__m512 __A) { - // CHECK-LABEL: @test_mm512_castps_si512 - // CHECK: bitcast <16 x float> %{{.}} to <8 x i64> + // APPLE-LABEL: test_mm512_castps_si512 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <16 x float> %__A to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_castps_si512 + // X64: entry: + // X64-NEXT: %0 = bitcast <16 x float> %__A to <8 x i64> + // X64-NEXT: ret <8 x i64> %0 return _mm512_castps_si512 (__A); } __m512i test_mm512_castsi128_si512(__m128i __A) { - // CHECK-LABEL: @test_mm512_castsi128_si512 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_castsi128_si512 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <2 x i64> %__A, <2 x i64> undef, <8 x i32> + // APPLE-NEXT: ret <8 x i64> %shuffle.i + // X64-LABEL: test_mm512_castsi128_si512 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <2 x i64> %__A, <2 x i64> undef, <8 x i32> + // X64-NEXT: ret <8 x i64> %shuffle.i return _mm512_castsi128_si512(__A); } __m512i test_mm512_castsi256_si512(__m256i __A) { - // CHECK-LABEL: @test_mm512_castsi256_si512 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_castsi256_si512 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <4 x i64> %__A, <4 x i64> undef, <8 x i32> + // APPLE-NEXT: ret <8 x i64> %shuffle.i + // X64-LABEL: test_mm512_castsi256_si512 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <4 x i64> %__A, <4 x i64> undef, <8 x i32> + // X64-NEXT: ret <8 x i64> %shuffle.i return _mm512_castsi256_si512(__A); } __m512 test_mm512_castsi512_ps (__m512i __A) { - // CHECK-LABEL: @test_mm512_castsi512_ps - // CHECK: bitcast <8 x i64> %{{.}} to <16 x float> + // APPLE-LABEL: test_mm512_castsi512_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x float> + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_castsi512_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x float> + // X64-NEXT: ret <16 x float> %0 return _mm512_castsi512_ps (__A); } __m512d test_mm512_castsi512_pd (__m512i __A) { - // CHECK-LABEL: @test_mm512_castsi512_pd - // CHECK: bitcast <8 x i64> %{{.}} to <8 x double> + // APPLE-LABEL: test_mm512_castsi512_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <8 x double> + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_castsi512_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <8 x double> + // X64-NEXT: ret <8 x double> %0 return _mm512_castsi512_pd (__A); } __m128i test_mm512_castsi512_si128 (__m512i __A) { - // CHECK-LABEL: @test_mm512_castsi512_si128 - // CHECK: shufflevector <8 x i64> %{{.}}, <8 x i64> %{{.}}, <2 x i32> + // APPLE-LABEL: test_mm512_castsi512_si128 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <8 x i64> %__A, <8 x i64> undef, <2 x i32> + // APPLE-NEXT: ret <2 x i64> %shuffle.i + // X64-LABEL: test_mm512_castsi512_si128 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <8 x i64> %__A, <8 x i64> undef, <2 x i32> + // X64-NEXT: ret <2 x i64> %shuffle.i return _mm512_castsi512_si128 (__A); } __m256i test_mm512_castsi512_si256 (__m512i __A) { - // CHECK-LABEL: @test_mm512_castsi512_si256 - // CHECK: shufflevector <8 x i64> %{{.}}, <8 x i64> %{{.}}, <4 x i32> + // APPLE-LABEL: test_mm512_castsi512_si256 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // APPLE-NEXT: ret <4 x i64> %shuffle.i + // X64-LABEL: test_mm512_castsi512_si256 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // X64-NEXT: ret <4 x i64> %shuffle.i return _mm512_castsi512_si256 (__A); } __m128 test_mm_cvt_roundsd_ss(__m128 __A, __m128d __B) { - // CHECK-LABEL: @test_mm_cvt_roundsd_ss - // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round + // APPLE-LABEL: test_mm_cvt_roundsd_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %__A, <2 x double> %__B, <4 x float> zeroinitializer, i8 -1, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_cvt_roundsd_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %__A, <2 x double> %__B, <4 x float> zeroinitializer, i8 -1, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_cvt_roundsd_ss(__A, __B, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_mask_cvt_roundsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_cvt_roundsd_ss - // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round + // APPLE-LABEL: test_mm_mask_cvt_roundsd_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %__A, <2 x double> %__B, <4 x float> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_cvt_roundsd_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %__A, <2 x double> %__B, <4 x float> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_mask_cvt_roundsd_ss(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_maskz_cvt_roundsd_ss(__mmask8 __U, __m128 __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_cvt_roundsd_ss - // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round + // APPLE-LABEL: test_mm_maskz_cvt_roundsd_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %__A, <2 x double> %__B, <4 x float> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_cvt_roundsd_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %__A, <2 x double> %__B, <4 x float> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_cvt_roundsd_ss(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); } #ifdef __x86_64__ __m128d test_mm_cvt_roundi64_sd(__m128d __A, long long __B) { - // CHECK-LABEL: @test_mm_cvt_roundi64_sd - // CHECK: @llvm.x86.avx512.cvtsi2sd64 + // APPLE-LABEL: test_mm_cvt_roundi64_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %__A, i64 %__B, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_cvt_roundi64_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %__A, i64 %__B, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_cvt_roundi64_sd(__A, __B, _MM_FROUND_CUR_DIRECTION); } __m128d test_mm_cvt_roundsi64_sd(__m128d __A, long long __B) { - // CHECK-LABEL: @test_mm_cvt_roundsi64_sd - // CHECK: @llvm.x86.avx512.cvtsi2sd64 + // APPLE-LABEL: test_mm_cvt_roundsi64_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %__A, i64 %__B, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_cvt_roundsi64_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %__A, i64 %__B, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_cvt_roundsi64_sd(__A, __B, _MM_FROUND_CUR_DIRECTION); } #endif __m128 test_mm_cvt_roundsi32_ss(__m128 __A, int __B) { - // CHECK-LABEL: @test_mm_cvt_roundsi32_ss - // CHECK: @llvm.x86.avx512.cvtsi2ss32 + // APPLE-LABEL: test_mm_cvt_roundsi32_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %__A, i32 %__B, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_cvt_roundsi32_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %__A, i32 %__B, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_cvt_roundsi32_ss(__A, __B, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_cvt_roundi32_ss(__m128 __A, int __B) { - // CHECK-LABEL: @test_mm_cvt_roundi32_ss - // CHECK: @llvm.x86.avx512.cvtsi2ss32 + // APPLE-LABEL: test_mm_cvt_roundi32_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %__A, i32 %__B, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_cvt_roundi32_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %__A, i32 %__B, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_cvt_roundi32_ss(__A, __B, _MM_FROUND_CUR_DIRECTION); } #ifdef __x86_64__ __m128 test_mm_cvt_roundsi64_ss(__m128 __A, long long __B) { - // CHECK-LABEL: @test_mm_cvt_roundsi64_ss - // CHECK: @llvm.x86.avx512.cvtsi2ss64 + // APPLE-LABEL: test_mm_cvt_roundsi64_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %__A, i64 %__B, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_cvt_roundsi64_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %__A, i64 %__B, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_cvt_roundsi64_ss(__A, __B, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_cvt_roundi64_ss(__m128 __A, long long __B) { - // CHECK-LABEL: @test_mm_cvt_roundi64_ss - // CHECK: @llvm.x86.avx512.cvtsi2ss64 + // APPLE-LABEL: test_mm_cvt_roundi64_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %__A, i64 %__B, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_cvt_roundi64_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %__A, i64 %__B, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_cvt_roundi64_ss(__A, __B, _MM_FROUND_CUR_DIRECTION); } #endif __m128d test_mm_cvt_roundss_sd(__m128d __A, __m128 __B) { - // CHECK-LABEL: @test_mm_cvt_roundss_sd - // CHECK: @llvm.x86.avx512.mask.cvtss2sd.round + // APPLE-LABEL: test_mm_cvt_roundss_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %__A, <4 x float> %__B, <2 x double> zeroinitializer, i8 -1, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_cvt_roundss_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %__A, <4 x float> %__B, <2 x double> zeroinitializer, i8 -1, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_cvt_roundss_sd(__A, __B, _MM_FROUND_CUR_DIRECTION); } __m128d test_mm_mask_cvt_roundss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_cvt_roundss_sd - // CHECK: @llvm.x86.avx512.mask.cvtss2sd.round + // APPLE-LABEL: test_mm_mask_cvt_roundss_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %__A, <4 x float> %__B, <2 x double> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_cvt_roundss_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %__A, <4 x float> %__B, <2 x double> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_cvt_roundss_sd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); } __m128d test_mm_maskz_cvt_roundss_sd( __mmask8 __U, __m128d __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_cvt_roundss_sd - // CHECK: @llvm.x86.avx512.mask.cvtss2sd.round + // APPLE-LABEL: test_mm_maskz_cvt_roundss_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %__A, <4 x float> %__B, <2 x double> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_cvt_roundss_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %__A, <4 x float> %__B, <2 x double> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_cvt_roundss_sd( __U, __A, __B, _MM_FROUND_CUR_DIRECTION); } __m128d test_mm_cvtu32_sd(__m128d __A, unsigned __B) { - // CHECK-LABEL: @test_mm_cvtu32_sd - // CHECK: uitofp i32 %{{.*}} to double - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // APPLE-LABEL: test_mm_cvtu32_sd + // APPLE: entry: + // APPLE-NEXT: %conv.i = uitofp i32 %__B to double + // APPLE-NEXT: %vecins.i = insertelement <2 x double> %__A, double %conv.i, i32 0 + // APPLE-NEXT: ret <2 x double> %vecins.i + // X64-LABEL: test_mm_cvtu32_sd + // X64: entry: + // X64-NEXT: %conv.i = uitofp i32 %__B to double + // X64-NEXT: %vecins.i = insertelement <2 x double> %__A, double %conv.i, i32 0 + // X64-NEXT: ret <2 x double> %vecins.i return _mm_cvtu32_sd(__A, __B); } #ifdef __x86_64__ __m128d test_mm_cvt_roundu64_sd(__m128d __A, unsigned long long __B) { - // CHECK-LABEL: @test_mm_cvt_roundu64_sd - // CHECK: @llvm.x86.avx512.cvtusi642sd + // APPLE-LABEL: test_mm_cvt_roundu64_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %__A, i64 %__B, i32 4) + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_cvt_roundu64_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %__A, i64 %__B, i32 4) + // X64-NEXT: ret <2 x double> %0 return _mm_cvt_roundu64_sd(__A, __B, _MM_FROUND_CUR_DIRECTION); } __m128d test_mm_cvtu64_sd(__m128d __A, unsigned long long __B) { - // CHECK-LABEL: @test_mm_cvtu64_sd - // CHECK: uitofp i64 %{{.*}} to double - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // APPLE-LABEL: test_mm_cvtu64_sd + // APPLE: entry: + // APPLE-NEXT: %conv.i = uitofp i64 %__B to double + // APPLE-NEXT: %vecins.i = insertelement <2 x double> %__A, double %conv.i, i32 0 + // APPLE-NEXT: ret <2 x double> %vecins.i + // X64-LABEL: test_mm_cvtu64_sd + // X64: entry: + // X64-NEXT: %conv.i = uitofp i64 %__B to double + // X64-NEXT: %vecins.i = insertelement <2 x double> %__A, double %conv.i, i32 0 + // X64-NEXT: ret <2 x double> %vecins.i return _mm_cvtu64_sd(__A, __B); } #endif __m128 test_mm_cvt_roundu32_ss(__m128 __A, unsigned __B) { - // CHECK-LABEL: @test_mm_cvt_roundu32_ss - // CHECK: @llvm.x86.avx512.cvtusi2ss + // APPLE-LABEL: test_mm_cvt_roundu32_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %__A, i32 %__B, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_cvt_roundu32_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %__A, i32 %__B, i32 4) + // X64-NEXT: ret <4 x float> %0 return _mm_cvt_roundu32_ss(__A, __B, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_cvtu32_ss(__m128 __A, unsigned __B) { - // CHECK-LABEL: @test_mm_cvtu32_ss - // CHECK: uitofp i32 %{{.*}} to float - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // APPLE-LABEL: test_mm_cvtu32_ss + // APPLE: entry: + // APPLE-NEXT: %conv.i = uitofp i32 %__B to float + // APPLE-NEXT: %vecins.i = insertelement <4 x float> %__A, float %conv.i, i32 0 + // APPLE-NEXT: ret <4 x float> %vecins.i + // X64-LABEL: test_mm_cvtu32_ss + // X64: entry: + // X64-NEXT: %conv.i = uitofp i32 %__B to float + // X64-NEXT: %vecins.i = insertelement <4 x float> %__A, float %conv.i, i32 0 + // X64-NEXT: ret <4 x float> %vecins.i return _mm_cvtu32_ss(__A, __B); } #ifdef __x86_64__ __m128 test_mm_cvt_roundu64_ss(__m128 __A, unsigned long long __B) { - // CHECK-LABEL: @test_mm_cvt_roundu64_ss - // CHECK: @llvm.x86.avx512.cvtusi642ss - return _mm_cvt_roundu64_ss(__A, __B, _MM_FROUND_CUR_DIRECTION); + // APPLE-LABEL: test_mm_cvt_roundu64_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %__A, i64 %__B, i32 4) + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_cvt_roundu64_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %__A, i64 %__B, i32 4) + // X64-NEXT: ret <4 x float> %0 + return _mm_cvt_roundu64_ss(__A, __B, _MM_FROUND_CUR_DIRECTION); } __m128 test_mm_cvtu64_ss(__m128 __A, unsigned long long __B) { - // CHECK-LABEL: @test_mm_cvtu64_ss - // CHECK: uitofp i64 %{{.*}} to float - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // APPLE-LABEL: test_mm_cvtu64_ss + // APPLE: entry: + // APPLE-NEXT: %conv.i = uitofp i64 %__B to float + // APPLE-NEXT: %vecins.i = insertelement <4 x float> %__A, float %conv.i, i32 0 + // APPLE-NEXT: ret <4 x float> %vecins.i + // X64-LABEL: test_mm_cvtu64_ss + // X64: entry: + // X64-NEXT: %conv.i = uitofp i64 %__B to float + // X64-NEXT: %vecins.i = insertelement <4 x float> %__A, float %conv.i, i32 0 + // X64-NEXT: ret <4 x float> %vecins.i return _mm_cvtu64_ss(__A, __B); } #endif __m512i test_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvttps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 + // APPLE-LABEL: test_mm512_mask_cvttps_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 4) #12 + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvttps_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 4) #12 + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_cvttps_epu32 (__W,__U,__A); } __m512i test_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvttps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 + // APPLE-LABEL: test_mm512_maskz_cvttps_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 4) #12 + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvttps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 4) #12 + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_cvttps_epu32 (__U,__A); } __m512 test_mm512_cvtepu32_ps (__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepu32_ps - // CHECK: uitofp <16 x i32> %{{.*}} to <16 x float> + // APPLE-LABEL: test_mm512_cvtepu32_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %conv.i = uitofp <16 x i32> %0 to <16 x float> + // APPLE-NEXT: ret <16 x float> %conv.i + // X64-LABEL: test_mm512_cvtepu32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %conv.i = uitofp <16 x i32> %0 to <16 x float> + // X64-NEXT: ret <16 x float> %conv.i return _mm512_cvtepu32_ps (__A); } __m512 test_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu32_ps - // CHECK: uitofp <16 x i32> %{{.*}} to <16 x float> - // CHECK: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepu32_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %conv.i.i = uitofp <16 x i32> %0 to <16 x float> + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %conv.i.i, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_cvtepu32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %conv.i.i = uitofp <16 x i32> %0 to <16 x float> + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %conv.i.i, <16 x float> %__W + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_cvtepu32_ps (__W,__U,__A); } __m512 test_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu32_ps - // CHECK: uitofp <16 x i32> %{{.*}} to <16 x float> - // CHECK: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepu32_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %conv.i.i = uitofp <16 x i32> %0 to <16 x float> + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %conv.i.i, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_cvtepu32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %conv.i.i = uitofp <16 x i32> %0 to <16 x float> + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %conv.i.i, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_cvtepu32_ps (__U,__A); } __m512d test_mm512_cvtepi32_pd (__m256i __A) { - // CHECK-LABEL: @test_mm512_cvtepi32_pd - // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> + // APPLE-LABEL: test_mm512_cvtepi32_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // APPLE-NEXT: %conv.i = sitofp <8 x i32> %0 to <8 x double> + // APPLE-NEXT: ret <8 x double> %conv.i + // X64-LABEL: test_mm512_cvtepi32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %conv.i = sitofp <8 x i32> %0 to <8 x double> + // X64-NEXT: ret <8 x double> %conv.i return _mm512_cvtepi32_pd (__A); } __m512d test_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_pd - // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> - // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepi32_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // APPLE-NEXT: %conv.i.i = sitofp <8 x i32> %0 to <8 x double> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %conv.i.i, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_cvtepi32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %conv.i.i = sitofp <8 x i32> %0 to <8 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %conv.i.i, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_cvtepi32_pd (__W,__U,__A); } __m512d test_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi32_pd - // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> - // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepi32_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // APPLE-NEXT: %conv.i.i = sitofp <8 x i32> %0 to <8 x double> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %conv.i.i, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_cvtepi32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %conv.i.i = sitofp <8 x i32> %0 to <8 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %conv.i.i, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_cvtepi32_pd (__U,__A); } __m512d test_mm512_cvtepi32lo_pd (__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi32lo_pd - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <4 x i32> - // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> + // APPLE-LABEL: test_mm512_cvtepi32lo_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // APPLE-NEXT: %0 = bitcast <4 x i64> %shuffle.i.i to <8 x i32> + // APPLE-NEXT: %conv.i.i = sitofp <8 x i32> %0 to <8 x double> + // APPLE-NEXT: ret <8 x double> %conv.i.i + // X64-LABEL: test_mm512_cvtepi32lo_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // X64-NEXT: %0 = bitcast <4 x i64> %shuffle.i.i to <8 x i32> + // X64-NEXT: %conv.i.i = sitofp <8 x i32> %0 to <8 x double> + // X64-NEXT: ret <8 x double> %conv.i.i return _mm512_cvtepi32lo_pd (__A); } __m512d test_mm512_mask_cvtepi32lo_pd (__m512d __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32lo_pd - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <4 x i32> - // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> - // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepi32lo_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // APPLE-NEXT: %0 = bitcast <4 x i64> %shuffle.i.i to <8 x i32> + // APPLE-NEXT: %conv.i.i.i = sitofp <8 x i32> %0 to <8 x double> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %conv.i.i.i, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_cvtepi32lo_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // X64-NEXT: %0 = bitcast <4 x i64> %shuffle.i.i to <8 x i32> + // X64-NEXT: %conv.i.i.i = sitofp <8 x i32> %0 to <8 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %conv.i.i.i, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_cvtepi32lo_pd (__W, __U, __A); } __m512 test_mm512_cvtepi32_ps (__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi32_ps - // CHECK: sitofp <16 x i32> %{{.*}} to <16 x float> + // APPLE-LABEL: test_mm512_cvtepi32_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %conv.i = sitofp <16 x i32> %0 to <16 x float> + // APPLE-NEXT: ret <16 x float> %conv.i + // X64-LABEL: test_mm512_cvtepi32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %conv.i = sitofp <16 x i32> %0 to <16 x float> + // X64-NEXT: ret <16 x float> %conv.i return _mm512_cvtepi32_ps (__A); } __m512 test_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_ps - // CHECK: sitofp <16 x i32> %{{.*}} to <16 x float> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepi32_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %conv.i.i = sitofp <16 x i32> %0 to <16 x float> + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %conv.i.i, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_cvtepi32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %conv.i.i = sitofp <16 x i32> %0 to <16 x float> + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %conv.i.i, <16 x float> %__W + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_cvtepi32_ps (__W,__U,__A); } __m512 test_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi32_ps - // CHECK: sitofp <16 x i32> %{{.*}} to <16 x float> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepi32_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %conv.i.i = sitofp <16 x i32> %0 to <16 x float> + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %conv.i.i, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_cvtepi32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %conv.i.i = sitofp <16 x i32> %0 to <16 x float> + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %conv.i.i, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_cvtepi32_ps (__U,__A); } __m512d test_mm512_cvtepu32_pd(__m256i __A) { - // CHECK-LABEL: @test_mm512_cvtepu32_pd - // CHECK: uitofp <8 x i32> %{{.*}} to <8 x double> + // APPLE-LABEL: test_mm512_cvtepu32_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // APPLE-NEXT: %conv.i = uitofp <8 x i32> %0 to <8 x double> + // APPLE-NEXT: ret <8 x double> %conv.i + // X64-LABEL: test_mm512_cvtepu32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %conv.i = uitofp <8 x i32> %0 to <8 x double> + // X64-NEXT: ret <8 x double> %conv.i return _mm512_cvtepu32_pd(__A); } __m512d test_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu32_pd - // CHECK: uitofp <8 x i32> %{{.*}} to <8 x double> - // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepu32_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // APPLE-NEXT: %conv.i.i = uitofp <8 x i32> %0 to <8 x double> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %conv.i.i, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_cvtepu32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %conv.i.i = uitofp <8 x i32> %0 to <8 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %conv.i.i, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_cvtepu32_pd (__W,__U,__A); } __m512d test_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu32_pd - // CHECK: uitofp <8 x i32> %{{.*}} to <8 x double> - // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} + // APPLE-LABEL: test_mm512_maskz_cvtepu32_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // APPLE-NEXT: %conv.i.i = uitofp <8 x i32> %0 to <8 x double> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %conv.i.i, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_cvtepu32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %conv.i.i = uitofp <8 x i32> %0 to <8 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %conv.i.i, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_cvtepu32_pd (__U,__A); } __m512d test_mm512_cvtepu32lo_pd (__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepu32lo_pd - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <4 x i32> - // CHECK: uitofp <8 x i32> %{{.*}} to <8 x double> + // APPLE-LABEL: test_mm512_cvtepu32lo_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // APPLE-NEXT: %0 = bitcast <4 x i64> %shuffle.i.i to <8 x i32> + // APPLE-NEXT: %conv.i.i = uitofp <8 x i32> %0 to <8 x double> + // APPLE-NEXT: ret <8 x double> %conv.i.i + // X64-LABEL: test_mm512_cvtepu32lo_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // X64-NEXT: %0 = bitcast <4 x i64> %shuffle.i.i to <8 x i32> + // X64-NEXT: %conv.i.i = uitofp <8 x i32> %0 to <8 x double> + // X64-NEXT: ret <8 x double> %conv.i.i return _mm512_cvtepu32lo_pd (__A); } __m512d test_mm512_mask_cvtepu32lo_pd (__m512d __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu32lo_pd - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <4 x i32> - // CHECK: uitofp <8 x i32> %{{.*}} to <8 x double> - // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} + // APPLE-LABEL: test_mm512_mask_cvtepu32lo_pd + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // APPLE-NEXT: %0 = bitcast <4 x i64> %shuffle.i.i to <8 x i32> + // APPLE-NEXT: %conv.i.i.i = uitofp <8 x i32> %0 to <8 x double> + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %conv.i.i.i, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_cvtepu32lo_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i64> %__A, <8 x i64> undef, <4 x i32> + // X64-NEXT: %0 = bitcast <4 x i64> %shuffle.i.i to <8 x i32> + // X64-NEXT: %conv.i.i.i = uitofp <8 x i32> %0 to <8 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %conv.i.i.i, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_cvtepu32lo_pd (__W, __U, __A); } __m256 test_mm512_cvtpd_ps (__m512d __A) { - // CHECK-LABEL: @test_mm512_cvtpd_ps - // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 + // APPLE-LABEL: test_mm512_cvtpd_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %__A, <8 x float> zeroinitializer, i8 -1, i32 4) #12 + // APPLE-NEXT: ret <8 x float> %0 + // X64-LABEL: test_mm512_cvtpd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %__A, <8 x float> zeroinitializer, i8 -1, i32 4) #12 + // X64-NEXT: ret <8 x float> %0 return _mm512_cvtpd_ps (__A); } __m256 test_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvtpd_ps - // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 + // APPLE-LABEL: test_mm512_mask_cvtpd_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %__A, <8 x float> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <8 x float> %0 + // X64-LABEL: test_mm512_mask_cvtpd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %__A, <8 x float> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <8 x float> %0 return _mm512_mask_cvtpd_ps (__W,__U,__A); } __m512d test_mm512_cvtpd_pslo(__m512 __A) { - // CHECK-LABEL: @test_mm512_cvtpd_pslo - // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 - // CHECK: zeroinitializer - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_cvtpd_pslo + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <16 x float> %__A to <8 x double> + // APPLE-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %0, <8 x float> zeroinitializer, i8 -1, i32 4) #12 + // APPLE-NEXT: %shuffle.i = shufflevector <8 x float> %1, <8 x float> zeroinitializer, <16 x i32> + // APPLE-NEXT: %2 = bitcast <16 x float> %shuffle.i to <8 x double> + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_cvtpd_pslo + // X64: entry: + // X64-NEXT: %0 = bitcast <16 x float> %__A to <8 x double> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %0, <8 x float> zeroinitializer, i8 -1, i32 4) #12 + // X64-NEXT: %shuffle.i = shufflevector <8 x float> %1, <8 x float> zeroinitializer, <16 x i32> + // X64-NEXT: %2 = bitcast <16 x float> %shuffle.i to <8 x double> + // X64-NEXT: ret <8 x double> %2 return _mm512_cvtpd_pslo(__A); } __m512d test_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvtpd_pslo - // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 - // CHECK: zeroinitializer - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_mask_cvtpd_pslo + // APPLE: entry: + // APPLE-NEXT: %shuffle.i.i = shufflevector <16 x float> %__W, <16 x float> undef, <8 x i32> + // APPLE-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %__A, <8 x float> %shuffle.i.i, i8 %__U, i32 4) #12 + // APPLE-NEXT: %shuffle.i = shufflevector <8 x float> %0, <8 x float> zeroinitializer, <16 x i32> + // APPLE-NEXT: %1 = bitcast <16 x float> %shuffle.i to <8 x double> + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_mask_cvtpd_pslo + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <16 x float> %__W, <16 x float> undef, <8 x i32> + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %__A, <8 x float> %shuffle.i.i, i8 %__U, i32 4) #12 + // X64-NEXT: %shuffle.i = shufflevector <8 x float> %0, <8 x float> zeroinitializer, <16 x i32> + // X64-NEXT: %1 = bitcast <16 x float> %shuffle.i to <8 x double> + // X64-NEXT: ret <8 x double> %1 return _mm512_mask_cvtpd_pslo(__W, __U, __A); } __m256 test_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtpd_ps - // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 + // APPLE-LABEL: test_mm512_maskz_cvtpd_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %__A, <8 x float> zeroinitializer, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <8 x float> %0 + // X64-LABEL: test_mm512_maskz_cvtpd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %__A, <8 x float> zeroinitializer, i8 %__U, i32 4) #12 + // X64-NEXT: ret <8 x float> %0 return _mm512_maskz_cvtpd_ps (__U,__A); } __m512 test_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtph_ps - // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 + // APPLE-LABEL: test_mm512_mask_cvtph_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %0, <16 x float> %__W, i16 %__U, i32 4) #12 + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_mask_cvtph_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %0, <16 x float> %__W, i16 %__U, i32 4) #12 + // X64-NEXT: ret <16 x float> %1 return _mm512_mask_cvtph_ps (__W,__U,__A); } __m512 test_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtph_ps - // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 + // APPLE-LABEL: test_mm512_maskz_cvtph_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // APPLE-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %0, <16 x float> zeroinitializer, i16 %__U, i32 4) #12 + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_maskz_cvtph_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // X64-NEXT: %1 = tail call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %0, <16 x float> zeroinitializer, i16 %__U, i32 4) #12 + // X64-NEXT: ret <16 x float> %1 return _mm512_maskz_cvtph_ps (__U,__A); } __m256i test_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvttpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 + // APPLE-LABEL: test_mm512_mask_cvttpd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // APPLE-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %__A, <8 x i32> %0, i8 %__U, i32 4) #12 + // APPLE-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_mask_cvttpd_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %__A, <8 x i32> %0, i8 %__U, i32 4) #12 + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_mask_cvttpd_epi32 (__W,__U,__A); } __m256i test_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_cvttpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 + // APPLE-LABEL: test_mm512_maskz_cvttpd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 %__U, i32 4) #12 + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvttpd_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 %__U, i32 4) #12 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_maskz_cvttpd_epi32 (__U,__A); } __m512i test_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvttps_epi32 - // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 + // APPLE-LABEL: test_mm512_mask_cvttps_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 4) #12 + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvttps_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 4) #12 + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_cvttps_epi32 (__W,__U,__A); } __m512i test_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvttps_epi32 - // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 + // APPLE-LABEL: test_mm512_maskz_cvttps_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 4) #12 + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvttps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 4) #12 + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_cvttps_epi32 (__U,__A); } __m512i test_mm512_cvtps_epi32 (__m512 __A) { - // CHECK-LABEL: @test_mm512_cvtps_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 + // APPLE-LABEL: test_mm512_cvtps_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 4) #12 + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_cvtps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 4) #12 + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_cvtps_epi32 (__A); } __m512i test_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvtps_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 + // APPLE-LABEL: test_mm512_mask_cvtps_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 4) #12 + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtps_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 4) #12 + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_cvtps_epi32 (__W,__U,__A); } __m512i test_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtps_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 + // APPLE-LABEL: test_mm512_maskz_cvtps_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 4) #12 + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 4) #12 + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_cvtps_epi32 (__U,__A); } __m256i test_mm512_cvtpd_epi32 (__m512d __A) { - // CHECK-LABEL: @test_mm512_cvtpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 + // APPLE-LABEL: test_mm512_cvtpd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 -1, i32 4) #12 + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_cvtpd_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 -1, i32 4) #12 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_cvtpd_epi32 (__A); } __m256i test_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvtpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 + // APPLE-LABEL: test_mm512_mask_cvtpd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // APPLE-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %__A, <8 x i32> %0, i8 %__U, i32 4) #12 + // APPLE-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtpd_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %__A, <8 x i32> %0, i8 %__U, i32 4) #12 + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_mask_cvtpd_epi32 (__W,__U,__A); } __m256i test_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 + // APPLE-LABEL: test_mm512_maskz_cvtpd_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 %__U, i32 4) #12 + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtpd_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 %__U, i32 4) #12 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_maskz_cvtpd_epi32 (__U,__A); } __m256i test_mm512_cvtpd_epu32 (__m512d __A) { - // CHECK-LABEL: @test_mm512_cvtpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 + // APPLE-LABEL: test_mm512_cvtpd_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 -1, i32 4) #12 + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_cvtpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 -1, i32 4) #12 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_cvtpd_epu32 (__A); } __m256i test_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvtpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 + // APPLE-LABEL: test_mm512_mask_cvtpd_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // APPLE-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %__A, <8 x i32> %0, i8 %__U, i32 4) #12 + // APPLE-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtpd_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %__A, <8 x i32> %0, i8 %__U, i32 4) #12 + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_mask_cvtpd_epu32 (__W,__U,__A); } __m256i test_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 + // APPLE-LABEL: test_mm512_maskz_cvtpd_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 %__U, i32 4) #12 + // APPLE-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %__A, <8 x i32> zeroinitializer, i8 %__U, i32 4) #12 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_maskz_cvtpd_epu32 (__U,__A); } __m256i test_mm512_mask_cvtps_ph(__m256i src, __mmask16 k, __m512 a) { - // CHECK-LABEL: @test_mm512_mask_cvtps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 + // APPLE-LABEL: test_mm512_mask_cvtps_ph + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <4 x i64> %src to <16 x i16> + // APPLE-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a, i32 4, <16 x i16> %0, i16 %k) + // APPLE-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtps_ph + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %src to <16 x i16> + // X64-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a, i32 4, <16 x i16> %0, i16 %k) + // X64-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm512_mask_cvtps_ph(src, k, a,_MM_FROUND_CUR_DIRECTION); } __m256i test_mm512_maskz_cvtps_ph (__mmask16 k, __m512 a) { - // CHECK-LABEL: @test_mm512_maskz_cvtps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 + // APPLE-LABEL: test_mm512_maskz_cvtps_ph + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a, i32 4, <16 x i16> zeroinitializer, i16 %k) + // APPLE-NEXT: %1 = bitcast <16 x i16> %0 to <4 x i64> + // APPLE-NEXT: ret <4 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtps_ph + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a, i32 4, <16 x i16> zeroinitializer, i16 %k) + // X64-NEXT: %1 = bitcast <16 x i16> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm512_maskz_cvtps_ph( k, a,_MM_FROUND_CUR_DIRECTION); } __m512i test_mm512_cvtps_epu32 ( __m512 __A) { - // CHECK-LABEL: @test_mm512_cvtps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 + // APPLE-LABEL: test_mm512_cvtps_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 4) #12 + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_cvtps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 4) #12 + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_cvtps_epu32(__A); } __m512i test_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvtps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 + // APPLE-LABEL: test_mm512_mask_cvtps_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 4) #12 + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_mask_cvtps_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %1 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %__A, <16 x i32> %0, i16 %__U, i32 4) #12 + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 return _mm512_mask_cvtps_epu32( __W, __U, __A); } __m512i test_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 + // APPLE-LABEL: test_mm512_maskz_cvtps_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 4) #12 + // APPLE-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_cvtps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 %__U, i32 4) #12 + // X64-NEXT: %1 = bitcast <16 x i32> %0 to <8 x i64> + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_cvtps_epu32( __U, __A); } double test_mm512_cvtsd_f64(__m512d A) { - // CHECK-LABEL: test_mm512_cvtsd_f64 - // CHECK: extractelement <8 x double> %{{.*}}, i32 0 + // APPLE-LABEL: test_mm512_cvtsd_f64 + // APPLE: entry: + // APPLE-NEXT: %vecext.i = extractelement <8 x double> %A, i32 0 + // APPLE-NEXT: ret double %vecext.i + // X64-LABEL: test_mm512_cvtsd_f64 + // X64: entry: + // X64-NEXT: %vecext.i = extractelement <8 x double> %A, i32 0 + // X64-NEXT: ret double %vecext.i return _mm512_cvtsd_f64(A); } float test_mm512_cvtss_f32(__m512 A) { - // CHECK-LABEL: test_mm512_cvtss_f32 - // CHECK: extractelement <16 x float> %{{.*}}, i32 0 + // APPLE-LABEL: test_mm512_cvtss_f32 + // APPLE: entry: + // APPLE-NEXT: %vecext.i = extractelement <16 x float> %A, i32 0 + // APPLE-NEXT: ret float %vecext.i + // X64-LABEL: test_mm512_cvtss_f32 + // X64: entry: + // X64-NEXT: %vecext.i = extractelement <16 x float> %A, i32 0 + // X64-NEXT: ret float %vecext.i return _mm512_cvtss_f32(A); } __m512d test_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_max_pd - // CHECK: @llvm.x86.avx512.max.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_max_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_max_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_max_pd (__W,__U,__A,__B); } __m512d test_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_max_pd - // CHECK: @llvm.x86.avx512.max.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_max_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_max_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_max_pd (__U,__A,__B); } __m512 test_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_max_ps - // CHECK: @llvm.x86.avx512.max.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_max_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_max_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_max_ps (__W,__U,__A,__B); } __m512d test_mm512_mask_max_round_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B) { - // CHECK-LABEL: @test_mm512_mask_max_round_pd - // CHECK: @llvm.x86.avx512.max.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_max_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_max_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_max_round_pd(__W,__U,__A,__B,_MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_maskz_max_round_pd(__mmask8 __U,__m512d __A,__m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_max_round_pd - // CHECK: @llvm.x86.avx512.max.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_max_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_max_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_max_round_pd(__U,__A,__B,_MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_max_round_pd(__m512d __A,__m512d __B) { - // CHECK-LABEL: @test_mm512_max_round_pd - // CHECK: @llvm.x86.avx512.max.pd.512 + // APPLE-LABEL: test_mm512_max_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_max_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_max_round_pd(__A,__B,_MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_max_ps - // CHECK: @llvm.x86.avx512.max.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_max_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_max_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_max_ps (__U,__A,__B); } __m512 test_mm512_mask_max_round_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B) { - // CHECK-LABEL: @test_mm512_mask_max_round_ps - // CHECK: @llvm.x86.avx512.max.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_max_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_max_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_max_round_ps(__W,__U,__A,__B,_MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_maskz_max_round_ps(__mmask16 __U,__m512 __A,__m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_max_round_ps - // CHECK: @llvm.x86.avx512.max.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_max_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_max_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_max_round_ps(__U,__A,__B,_MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_max_round_ps(__m512 __A,__m512 __B) { - // CHECK-LABEL: @test_mm512_max_round_ps - // CHECK: @llvm.x86.avx512.max.ps.512 + // APPLE-LABEL: test_mm512_max_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_max_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_max_round_ps(__A,__B,_MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_min_pd - // CHECK: @llvm.x86.avx512.min.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_min_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_min_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_min_pd (__W,__U,__A,__B); } __m512d test_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_min_pd - // CHECK: @llvm.x86.avx512.min.pd.512 + // APPLE-LABEL: test_mm512_maskz_min_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) #12 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_min_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) #12 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_min_pd (__U,__A,__B); } __m512d test_mm512_mask_min_round_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B) { - // CHECK-LABEL: @test_mm512_mask_min_round_pd - // CHECK: @llvm.x86.avx512.min.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_mask_min_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_mask_min_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W + // X64-NEXT: ret <8 x double> %2 return _mm512_mask_min_round_pd(__W,__U,__A,__B,_MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_maskz_min_round_pd(__mmask8 __U,__m512d __A,__m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_min_round_pd - // CHECK: @llvm.x86.avx512.min.pd.512 - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_min_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // APPLE-NEXT: ret <8 x double> %2 + // X64-LABEL: test_mm512_maskz_min_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + // X64-NEXT: ret <8 x double> %2 return _mm512_maskz_min_round_pd(__U,__A,__B,_MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_min_round_pd( __m512d __A,__m512d __B) { - // CHECK-LABEL: @test_mm512_min_round_pd - // CHECK: @llvm.x86.avx512.min.pd.512 + // APPLE-LABEL: test_mm512_min_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_min_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_min_round_pd(__A,__B,_MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_min_ps - // CHECK: @llvm.x86.avx512.min.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_min_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_min_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_min_ps (__W,__U,__A,__B); } __m512 test_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_min_ps - // CHECK: @llvm.x86.avx512.min.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_min_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) #12 + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_min_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) #12 + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_min_ps (__U,__A,__B); } __m512 test_mm512_mask_min_round_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B) { - // CHECK-LABEL: @test_mm512_mask_min_round_ps - // CHECK: @llvm.x86.avx512.min.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_mask_min_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_mask_min_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W + // X64-NEXT: ret <16 x float> %2 return _mm512_mask_min_round_ps(__W,__U,__A,__B,_MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_maskz_min_round_ps(__mmask16 __U,__m512 __A,__m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_min_round_ps - // CHECK: @llvm.x86.avx512.min.ps.512 - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_min_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) + // APPLE-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // APPLE-NEXT: ret <16 x float> %2 + // X64-LABEL: test_mm512_maskz_min_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) + // X64-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + // X64-NEXT: ret <16 x float> %2 return _mm512_maskz_min_round_ps(__U,__A,__B,_MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_min_round_ps(__m512 __A,__m512 __B) { - // CHECK-LABEL: @test_mm512_min_round_ps - // CHECK: @llvm.x86.avx512.min.ps.512 + // APPLE-LABEL: test_mm512_min_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_min_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_min_round_ps(__A,__B,_MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_floor_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 + // APPLE-LABEL: test_mm512_mask_floor_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__A, i32 1, <16 x float> %__W, i16 %__U, i32 4) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_mask_floor_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__A, i32 1, <16 x float> %__W, i16 %__U, i32 4) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_mask_floor_ps (__W,__U,__A); } __m512d test_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_floor_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 + // APPLE-LABEL: test_mm512_mask_floor_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__A, i32 1, <8 x double> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_floor_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__A, i32 1, <8 x double> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_floor_pd (__W,__U,__A); } __m512 test_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_ceil_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 + // APPLE-LABEL: test_mm512_mask_ceil_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__A, i32 2, <16 x float> %__W, i16 %__U, i32 4) #12 + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_mask_ceil_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__A, i32 2, <16 x float> %__W, i16 %__U, i32 4) #12 + // X64-NEXT: ret <16 x float> %0 return _mm512_mask_ceil_ps (__W,__U,__A); } __m512d test_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_ceil_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 + // APPLE-LABEL: test_mm512_mask_ceil_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__A, i32 2, <8 x double> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_ceil_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__A, i32 2, <8 x double> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_ceil_pd (__W,__U,__A); } __m512 test_mm512_mask_roundscale_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_roundscale_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 + // APPLE-LABEL: test_mm512_mask_roundscale_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__A, i32 1, <16 x float> %__W, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_mask_roundscale_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__A, i32 1, <16 x float> %__W, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_mask_roundscale_ps(__W,__U,__A, 1); } __m512 test_mm512_maskz_roundscale_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_roundscale_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 + // APPLE-LABEL: test_mm512_maskz_roundscale_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__A, i32 1, <16 x float> zeroinitializer, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_maskz_roundscale_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__A, i32 1, <16 x float> zeroinitializer, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_maskz_roundscale_ps(__U,__A, 1); } __m512 test_mm512_mask_roundscale_round_ps(__m512 __A,__mmask16 __U,__m512 __C) { - // CHECK-LABEL: @test_mm512_mask_roundscale_round_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 + // APPLE-LABEL: test_mm512_mask_roundscale_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__C, i32 3, <16 x float> %__A, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_mask_roundscale_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__C, i32 3, <16 x float> %__A, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_mask_roundscale_round_ps(__A,__U,__C,3,_MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_maskz_roundscale_round_ps(__m512 __A,__mmask16 __U) { - // CHECK-LABEL: @test_mm512_maskz_roundscale_round_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 + // APPLE-LABEL: test_mm512_maskz_roundscale_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__A, i32 3, <16 x float> zeroinitializer, i16 %__U, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_maskz_roundscale_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__A, i32 3, <16 x float> zeroinitializer, i16 %__U, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_maskz_roundscale_round_ps(__U,__A,3,_MM_FROUND_CUR_DIRECTION); } __m512 test_mm512_roundscale_round_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_roundscale_round_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 + // APPLE-LABEL: test_mm512_roundscale_round_ps + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__A, i32 3, <16 x float> zeroinitializer, i16 -1, i32 4) + // APPLE-NEXT: ret <16 x float> %0 + // X64-LABEL: test_mm512_roundscale_round_ps + // X64: entry: + // X64-NEXT: %0 = tail call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %__A, i32 3, <16 x float> zeroinitializer, i16 -1, i32 4) + // X64-NEXT: ret <16 x float> %0 return _mm512_roundscale_round_ps(__A,3,_MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_mask_roundscale_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_roundscale_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 + // APPLE-LABEL: test_mm512_mask_roundscale_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__A, i32 1, <8 x double> %__W, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_roundscale_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__A, i32 1, <8 x double> %__W, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_roundscale_pd(__W,__U,__A, 1); } __m512d test_mm512_maskz_roundscale_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_roundscale_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 + // APPLE-LABEL: test_mm512_maskz_roundscale_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__A, i32 1, <8 x double> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_maskz_roundscale_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__A, i32 1, <8 x double> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_maskz_roundscale_pd(__U,__A, 1); } __m512d test_mm512_mask_roundscale_round_pd(__m512d __A,__mmask8 __U,__m512d __C) { - // CHECK-LABEL: @test_mm512_mask_roundscale_round_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 + // APPLE-LABEL: test_mm512_mask_roundscale_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__C, i32 3, <8 x double> %__A, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_mask_roundscale_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__C, i32 3, <8 x double> %__A, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_mask_roundscale_round_pd(__A,__U,__C,3,_MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_maskz_roundscale_round_pd(__m512d __A,__mmask8 __U) { - // CHECK-LABEL: @test_mm512_maskz_roundscale_round_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 + // APPLE-LABEL: test_mm512_maskz_roundscale_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__A, i32 3, <8 x double> zeroinitializer, i8 %__U, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_maskz_roundscale_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__A, i32 3, <8 x double> zeroinitializer, i8 %__U, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_maskz_roundscale_round_pd(__U,__A,3,_MM_FROUND_CUR_DIRECTION); } __m512d test_mm512_roundscale_round_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_roundscale_round_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 + // APPLE-LABEL: test_mm512_roundscale_round_pd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__A, i32 3, <8 x double> zeroinitializer, i8 -1, i32 4) + // APPLE-NEXT: ret <8 x double> %0 + // X64-LABEL: test_mm512_roundscale_round_pd + // X64: entry: + // X64-NEXT: %0 = tail call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %__A, i32 3, <8 x double> zeroinitializer, i8 -1, i32 4) + // X64-NEXT: ret <8 x double> %0 return _mm512_roundscale_round_pd(__A,3,_MM_FROUND_CUR_DIRECTION); } __m512i test_mm512_max_epi32 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // APPLE-LABEL: test_mm512_max_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp sgt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_max_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = icmp sgt <16 x i32> %0, %1 + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_max_epi32 (__A,__B); } __m512i test_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // APPLE-LABEL: test_mm512_mask_max_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp sgt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 + // APPLE-NEXT: %7 = bitcast <16 x i32> %6 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %7 + // X64-LABEL: test_mm512_mask_max_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = icmp sgt <16 x i32> %0, %1 + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 + // X64-NEXT: %7 = bitcast <16 x i32> %6 to <8 x i64> + // X64-NEXT: ret <8 x i64> %7 return _mm512_mask_max_epi32 (__W,__M,__A,__B); } __m512i test_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // APPLE-LABEL: test_mm512_maskz_max_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp sgt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_maskz_max_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = icmp sgt <16 x i32> %0, %1 + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_maskz_max_epi32 (__M,__A,__B); } __m512i test_mm512_max_epi64 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // APPLE-LABEL: test_mm512_max_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = icmp sgt <8 x i64> %__A, %__B + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_max_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp sgt <8 x i64> %__A, %__B + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // X64-NEXT: ret <8 x i64> %1 return _mm512_max_epi64 (__A,__B); } __m512i test_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // APPLE-LABEL: test_mm512_mask_max_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = icmp sgt <8 x i64> %__A, %__B + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // APPLE-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_mask_max_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp sgt <8 x i64> %__A, %__B + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %3 return _mm512_mask_max_epi64 (__W,__M,__A,__B); } __m512i test_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // APPLE-LABEL: test_mm512_maskz_max_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = icmp sgt <8 x i64> %__A, %__B + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // APPLE-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_max_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp sgt <8 x i64> %__A, %__B + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_max_epi64 (__M,__A,__B); } __m512i test_mm512_max_epu64 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // APPLE-LABEL: test_mm512_max_epu64 + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ugt <8 x i64> %__A, %__B + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_max_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ugt <8 x i64> %__A, %__B + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // X64-NEXT: ret <8 x i64> %1 return _mm512_max_epu64 (__A,__B); } __m512i test_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // APPLE-LABEL: test_mm512_mask_max_epu64 + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ugt <8 x i64> %__A, %__B + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // APPLE-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_mask_max_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ugt <8 x i64> %__A, %__B + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %3 return _mm512_mask_max_epu64 (__W,__M,__A,__B); } __m512i test_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // APPLE-LABEL: test_mm512_maskz_max_epu64 + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ugt <8 x i64> %__A, %__B + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // APPLE-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_max_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ugt <8 x i64> %__A, %__B + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_max_epu64 (__M,__A,__B); } __m512i test_mm512_max_epu32 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // APPLE-LABEL: test_mm512_max_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp ugt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_max_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = icmp ugt <16 x i32> %0, %1 + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_max_epu32 (__A,__B); } __m512i test_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // APPLE-LABEL: test_mm512_mask_max_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp ugt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 + // APPLE-NEXT: %7 = bitcast <16 x i32> %6 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %7 + // X64-LABEL: test_mm512_mask_max_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = icmp ugt <16 x i32> %0, %1 + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 + // X64-NEXT: %7 = bitcast <16 x i32> %6 to <8 x i64> + // X64-NEXT: ret <8 x i64> %7 return _mm512_mask_max_epu32 (__W,__M,__A,__B); } __m512i test_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // APPLE-LABEL: test_mm512_maskz_max_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp ugt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_maskz_max_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = icmp ugt <16 x i32> %0, %1 + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_maskz_max_epu32 (__M,__A,__B); } __m512i test_mm512_min_epi32 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // APPLE-LABEL: test_mm512_min_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp slt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_min_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = icmp slt <16 x i32> %0, %1 + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_min_epi32 (__A,__B); } __m512i test_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // APPLE-LABEL: test_mm512_mask_min_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp slt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 + // APPLE-NEXT: %7 = bitcast <16 x i32> %6 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %7 + // X64-LABEL: test_mm512_mask_min_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = icmp slt <16 x i32> %0, %1 + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 + // X64-NEXT: %7 = bitcast <16 x i32> %6 to <8 x i64> + // X64-NEXT: ret <8 x i64> %7 return _mm512_mask_min_epi32 (__W,__M,__A,__B); } __m512i test_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // APPLE-LABEL: test_mm512_maskz_min_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp slt <16 x i32> %0, %1 + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_maskz_min_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = icmp slt <16 x i32> %0, %1 + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_maskz_min_epi32 (__M,__A,__B); } __m512i test_mm512_min_epu32 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // APPLE-LABEL: test_mm512_min_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp ult <16 x i32> %0, %1 + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_min_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = icmp ult <16 x i32> %0, %1 + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <16 x i32> %3 to <8 x i64> + // X64-NEXT: ret <8 x i64> %4 return _mm512_min_epu32 (__A,__B); } __m512i test_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // APPLE-LABEL: test_mm512_mask_min_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp ult <16 x i32> %0, %1 + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 + // APPLE-NEXT: %7 = bitcast <16 x i32> %6 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %7 + // X64-LABEL: test_mm512_mask_min_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = icmp ult <16 x i32> %0, %1 + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 + // X64-NEXT: %7 = bitcast <16 x i32> %6 to <8 x i64> + // X64-NEXT: ret <8 x i64> %7 return _mm512_mask_min_epu32 (__W,__M,__A,__B); } __m512i test_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // APPLE-LABEL: test_mm512_maskz_min_epu32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // APPLE-NEXT: %2 = icmp ult <16 x i32> %0, %1 + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // APPLE-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_maskz_min_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = bitcast <8 x i64> %__B to <16 x i32> + // X64-NEXT: %2 = icmp ult <16 x i32> %0, %1 + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + // X64-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_maskz_min_epu32 (__M,__A,__B); } __m512i test_mm512_min_epi64 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // APPLE-LABEL: test_mm512_min_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = icmp slt <8 x i64> %__A, %__B + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_min_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp slt <8 x i64> %__A, %__B + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // X64-NEXT: ret <8 x i64> %1 return _mm512_min_epi64 (__A,__B); } __m512i test_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // APPLE-LABEL: test_mm512_mask_min_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = icmp slt <8 x i64> %__A, %__B + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // APPLE-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_mask_min_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp slt <8 x i64> %__A, %__B + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %3 return _mm512_mask_min_epi64 (__W,__M,__A,__B); } __m512i test_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // APPLE-LABEL: test_mm512_maskz_min_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = icmp slt <8 x i64> %__A, %__B + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // APPLE-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_min_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp slt <8 x i64> %__A, %__B + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_min_epi64 (__M,__A,__B); } __m512i test_mm512_min_epu64 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // APPLE-LABEL: test_mm512_min_epu64 + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ult <8 x i64> %__A, %__B + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_min_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ult <8 x i64> %__A, %__B + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // X64-NEXT: ret <8 x i64> %1 return _mm512_min_epu64 (__A,__B); } __m512i test_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // APPLE-LABEL: test_mm512_mask_min_epu64 + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ult <8 x i64> %__A, %__B + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // APPLE-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_mask_min_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ult <8 x i64> %__A, %__B + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %3 return _mm512_mask_min_epu64 (__W,__M,__A,__B); } __m512i test_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // APPLE-LABEL: test_mm512_maskz_min_epu64 + // APPLE: entry: + // APPLE-NEXT: %0 = icmp ult <8 x i64> %__A, %__B + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // APPLE-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_maskz_min_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ult <8 x i64> %__A, %__B + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %__A, <8 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %3 return _mm512_maskz_min_epu64 (__M,__A,__B); } __m512i test_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) { - // CHECK-LABEL: @test_mm512_mask_set1_epi32 - // CHECK: insertelement <16 x i32> undef, i32 %{{.*}}, i32 0 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 1 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 2 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 3 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 4 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 5 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 6 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 7 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 8 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 9 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 10 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 11 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 12 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 13 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 14 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 15 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_set1_epi32 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i.i = insertelement <16 x i32> undef, i32 %__A, i32 0 + // APPLE-NEXT: %vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer + // APPLE-NEXT: %0 = bitcast <8 x i64> %__O to <16 x i32> + // APPLE-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %2 = select <16 x i1> %1, <16 x i32> %vecinit15.i.i, <16 x i32> %0 + // APPLE-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %3 + // X64-LABEL: test_mm512_mask_set1_epi32 + // X64: entry: + // X64-NEXT: %vecinit.i.i = insertelement <16 x i32> undef, i32 %__A, i32 0 + // X64-NEXT: %vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast <8 x i64> %__O to <16 x i32> + // X64-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %2 = select <16 x i1> %1, <16 x i32> %vecinit15.i.i, <16 x i32> %0 + // X64-NEXT: %3 = bitcast <16 x i32> %2 to <8 x i64> + // X64-NEXT: ret <8 x i64> %3 return _mm512_mask_set1_epi32 ( __O, __M, __A); } -__m512i test_mm512_maskz_set1_epi32(__mmask16 __M, int __A) -{ - // CHECK-LABEL: @test_mm512_maskz_set1_epi32 - // CHECK: insertelement <16 x i32> undef, i32 %{{.*}}, i32 0 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 1 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 2 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 3 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 4 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 5 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 6 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 7 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 8 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 9 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 10 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 11 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 12 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 13 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 14 - // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 15 - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} - return _mm512_maskz_set1_epi32(__M, __A); +__m512i test_mm512_maskz_set1_epi32(__mmask16 __M, int __A) { + // APPLE-LABEL: test_mm512_maskz_set1_epi32 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i.i = insertelement <16 x i32> undef, i32 %__A, i32 0 + // APPLE-NEXT: %vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer + // APPLE-NEXT: %0 = bitcast i16 %__M to <16 x i1> + // APPLE-NEXT: %1 = select <16 x i1> %0, <16 x i32> %vecinit15.i.i, <16 x i32> zeroinitializer + // APPLE-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %2 + // X64-LABEL: test_mm512_maskz_set1_epi32 + // X64: entry: + // X64-NEXT: %vecinit.i.i = insertelement <16 x i32> undef, i32 %__A, i32 0 + // X64-NEXT: %vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i16 %__M to <16 x i1> + // X64-NEXT: %1 = select <16 x i1> %0, <16 x i32> %vecinit15.i.i, <16 x i32> zeroinitializer + // X64-NEXT: %2 = bitcast <16 x i32> %1 to <8 x i64> + // X64-NEXT: ret <8 x i64> %2 + return _mm512_maskz_set1_epi32(__M, __A); } - __m512i test_mm512_set_epi8(char e63, char e62, char e61, char e60, char e59, char e58, char e57, char e56, char e55, char e54, char e53, char e52, char e51, char e50, char e49, char e48, char e47, char e46, char e45, @@ -10138,72 +23293,143 @@ char e16, char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) { + // APPLE-LABEL: test_mm512_set_epi8 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <64 x i8> undef, i8 %e0, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <64 x i8> %vecinit.i, i8 %e1, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <64 x i8> %vecinit1.i, i8 %e2, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <64 x i8> %vecinit2.i, i8 %e3, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <64 x i8> %vecinit3.i, i8 %e4, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <64 x i8> %vecinit4.i, i8 %e5, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <64 x i8> %vecinit5.i, i8 %e6, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <64 x i8> %vecinit6.i, i8 %e7, i32 7 + // APPLE-NEXT: %vecinit8.i = insertelement <64 x i8> %vecinit7.i, i8 %e8, i32 8 + // APPLE-NEXT: %vecinit9.i = insertelement <64 x i8> %vecinit8.i, i8 %e9, i32 9 + // APPLE-NEXT: %vecinit10.i = insertelement <64 x i8> %vecinit9.i, i8 %e10, i32 10 + // APPLE-NEXT: %vecinit11.i = insertelement <64 x i8> %vecinit10.i, i8 %e11, i32 11 + // APPLE-NEXT: %vecinit12.i = insertelement <64 x i8> %vecinit11.i, i8 %e12, i32 12 + // APPLE-NEXT: %vecinit13.i = insertelement <64 x i8> %vecinit12.i, i8 %e13, i32 13 + // APPLE-NEXT: %vecinit14.i = insertelement <64 x i8> %vecinit13.i, i8 %e14, i32 14 + // APPLE-NEXT: %vecinit15.i = insertelement <64 x i8> %vecinit14.i, i8 %e15, i32 15 + // APPLE-NEXT: %vecinit16.i = insertelement <64 x i8> %vecinit15.i, i8 %e16, i32 16 + // APPLE-NEXT: %vecinit17.i = insertelement <64 x i8> %vecinit16.i, i8 %e17, i32 17 + // APPLE-NEXT: %vecinit18.i = insertelement <64 x i8> %vecinit17.i, i8 %e18, i32 18 + // APPLE-NEXT: %vecinit19.i = insertelement <64 x i8> %vecinit18.i, i8 %e19, i32 19 + // APPLE-NEXT: %vecinit20.i = insertelement <64 x i8> %vecinit19.i, i8 %e20, i32 20 + // APPLE-NEXT: %vecinit21.i = insertelement <64 x i8> %vecinit20.i, i8 %e21, i32 21 + // APPLE-NEXT: %vecinit22.i = insertelement <64 x i8> %vecinit21.i, i8 %e22, i32 22 + // APPLE-NEXT: %vecinit23.i = insertelement <64 x i8> %vecinit22.i, i8 %e23, i32 23 + // APPLE-NEXT: %vecinit24.i = insertelement <64 x i8> %vecinit23.i, i8 %e24, i32 24 + // APPLE-NEXT: %vecinit25.i = insertelement <64 x i8> %vecinit24.i, i8 %e25, i32 25 + // APPLE-NEXT: %vecinit26.i = insertelement <64 x i8> %vecinit25.i, i8 %e26, i32 26 + // APPLE-NEXT: %vecinit27.i = insertelement <64 x i8> %vecinit26.i, i8 %e27, i32 27 + // APPLE-NEXT: %vecinit28.i = insertelement <64 x i8> %vecinit27.i, i8 %e28, i32 28 + // APPLE-NEXT: %vecinit29.i = insertelement <64 x i8> %vecinit28.i, i8 %e29, i32 29 + // APPLE-NEXT: %vecinit30.i = insertelement <64 x i8> %vecinit29.i, i8 %e30, i32 30 + // APPLE-NEXT: %vecinit31.i = insertelement <64 x i8> %vecinit30.i, i8 %e31, i32 31 + // APPLE-NEXT: %vecinit32.i = insertelement <64 x i8> %vecinit31.i, i8 %e32, i32 32 + // APPLE-NEXT: %vecinit33.i = insertelement <64 x i8> %vecinit32.i, i8 %e33, i32 33 + // APPLE-NEXT: %vecinit34.i = insertelement <64 x i8> %vecinit33.i, i8 %e34, i32 34 + // APPLE-NEXT: %vecinit35.i = insertelement <64 x i8> %vecinit34.i, i8 %e35, i32 35 + // APPLE-NEXT: %vecinit36.i = insertelement <64 x i8> %vecinit35.i, i8 %e36, i32 36 + // APPLE-NEXT: %vecinit37.i = insertelement <64 x i8> %vecinit36.i, i8 %e37, i32 37 + // APPLE-NEXT: %vecinit38.i = insertelement <64 x i8> %vecinit37.i, i8 %e38, i32 38 + // APPLE-NEXT: %vecinit39.i = insertelement <64 x i8> %vecinit38.i, i8 %e39, i32 39 + // APPLE-NEXT: %vecinit40.i = insertelement <64 x i8> %vecinit39.i, i8 %e40, i32 40 + // APPLE-NEXT: %vecinit41.i = insertelement <64 x i8> %vecinit40.i, i8 %e41, i32 41 + // APPLE-NEXT: %vecinit42.i = insertelement <64 x i8> %vecinit41.i, i8 %e42, i32 42 + // APPLE-NEXT: %vecinit43.i = insertelement <64 x i8> %vecinit42.i, i8 %e43, i32 43 + // APPLE-NEXT: %vecinit44.i = insertelement <64 x i8> %vecinit43.i, i8 %e44, i32 44 + // APPLE-NEXT: %vecinit45.i = insertelement <64 x i8> %vecinit44.i, i8 %e45, i32 45 + // APPLE-NEXT: %vecinit46.i = insertelement <64 x i8> %vecinit45.i, i8 %e46, i32 46 + // APPLE-NEXT: %vecinit47.i = insertelement <64 x i8> %vecinit46.i, i8 %e47, i32 47 + // APPLE-NEXT: %vecinit48.i = insertelement <64 x i8> %vecinit47.i, i8 %e48, i32 48 + // APPLE-NEXT: %vecinit49.i = insertelement <64 x i8> %vecinit48.i, i8 %e49, i32 49 + // APPLE-NEXT: %vecinit50.i = insertelement <64 x i8> %vecinit49.i, i8 %e50, i32 50 + // APPLE-NEXT: %vecinit51.i = insertelement <64 x i8> %vecinit50.i, i8 %e51, i32 51 + // APPLE-NEXT: %vecinit52.i = insertelement <64 x i8> %vecinit51.i, i8 %e52, i32 52 + // APPLE-NEXT: %vecinit53.i = insertelement <64 x i8> %vecinit52.i, i8 %e53, i32 53 + // APPLE-NEXT: %vecinit54.i = insertelement <64 x i8> %vecinit53.i, i8 %e54, i32 54 + // APPLE-NEXT: %vecinit55.i = insertelement <64 x i8> %vecinit54.i, i8 %e55, i32 55 + // APPLE-NEXT: %vecinit56.i = insertelement <64 x i8> %vecinit55.i, i8 %e56, i32 56 + // APPLE-NEXT: %vecinit57.i = insertelement <64 x i8> %vecinit56.i, i8 %e57, i32 57 + // APPLE-NEXT: %vecinit58.i = insertelement <64 x i8> %vecinit57.i, i8 %e58, i32 58 + // APPLE-NEXT: %vecinit59.i = insertelement <64 x i8> %vecinit58.i, i8 %e59, i32 59 + // APPLE-NEXT: %vecinit60.i = insertelement <64 x i8> %vecinit59.i, i8 %e60, i32 60 + // APPLE-NEXT: %vecinit61.i = insertelement <64 x i8> %vecinit60.i, i8 %e61, i32 61 + // APPLE-NEXT: %vecinit62.i = insertelement <64 x i8> %vecinit61.i, i8 %e62, i32 62 + // APPLE-NEXT: %vecinit63.i = insertelement <64 x i8> %vecinit62.i, i8 %e63, i32 63 + // APPLE-NEXT: %0 = bitcast <64 x i8> %vecinit63.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_set_epi8 + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <64 x i8> undef, i8 %e0, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <64 x i8> %vecinit.i, i8 %e1, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <64 x i8> %vecinit1.i, i8 %e2, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <64 x i8> %vecinit2.i, i8 %e3, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <64 x i8> %vecinit3.i, i8 %e4, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <64 x i8> %vecinit4.i, i8 %e5, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <64 x i8> %vecinit5.i, i8 %e6, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <64 x i8> %vecinit6.i, i8 %e7, i32 7 + // X64-NEXT: %vecinit8.i = insertelement <64 x i8> %vecinit7.i, i8 %e8, i32 8 + // X64-NEXT: %vecinit9.i = insertelement <64 x i8> %vecinit8.i, i8 %e9, i32 9 + // X64-NEXT: %vecinit10.i = insertelement <64 x i8> %vecinit9.i, i8 %e10, i32 10 + // X64-NEXT: %vecinit11.i = insertelement <64 x i8> %vecinit10.i, i8 %e11, i32 11 + // X64-NEXT: %vecinit12.i = insertelement <64 x i8> %vecinit11.i, i8 %e12, i32 12 + // X64-NEXT: %vecinit13.i = insertelement <64 x i8> %vecinit12.i, i8 %e13, i32 13 + // X64-NEXT: %vecinit14.i = insertelement <64 x i8> %vecinit13.i, i8 %e14, i32 14 + // X64-NEXT: %vecinit15.i = insertelement <64 x i8> %vecinit14.i, i8 %e15, i32 15 + // X64-NEXT: %vecinit16.i = insertelement <64 x i8> %vecinit15.i, i8 %e16, i32 16 + // X64-NEXT: %vecinit17.i = insertelement <64 x i8> %vecinit16.i, i8 %e17, i32 17 + // X64-NEXT: %vecinit18.i = insertelement <64 x i8> %vecinit17.i, i8 %e18, i32 18 + // X64-NEXT: %vecinit19.i = insertelement <64 x i8> %vecinit18.i, i8 %e19, i32 19 + // X64-NEXT: %vecinit20.i = insertelement <64 x i8> %vecinit19.i, i8 %e20, i32 20 + // X64-NEXT: %vecinit21.i = insertelement <64 x i8> %vecinit20.i, i8 %e21, i32 21 + // X64-NEXT: %vecinit22.i = insertelement <64 x i8> %vecinit21.i, i8 %e22, i32 22 + // X64-NEXT: %vecinit23.i = insertelement <64 x i8> %vecinit22.i, i8 %e23, i32 23 + // X64-NEXT: %vecinit24.i = insertelement <64 x i8> %vecinit23.i, i8 %e24, i32 24 + // X64-NEXT: %vecinit25.i = insertelement <64 x i8> %vecinit24.i, i8 %e25, i32 25 + // X64-NEXT: %vecinit26.i = insertelement <64 x i8> %vecinit25.i, i8 %e26, i32 26 + // X64-NEXT: %vecinit27.i = insertelement <64 x i8> %vecinit26.i, i8 %e27, i32 27 + // X64-NEXT: %vecinit28.i = insertelement <64 x i8> %vecinit27.i, i8 %e28, i32 28 + // X64-NEXT: %vecinit29.i = insertelement <64 x i8> %vecinit28.i, i8 %e29, i32 29 + // X64-NEXT: %vecinit30.i = insertelement <64 x i8> %vecinit29.i, i8 %e30, i32 30 + // X64-NEXT: %vecinit31.i = insertelement <64 x i8> %vecinit30.i, i8 %e31, i32 31 + // X64-NEXT: %vecinit32.i = insertelement <64 x i8> %vecinit31.i, i8 %e32, i32 32 + // X64-NEXT: %vecinit33.i = insertelement <64 x i8> %vecinit32.i, i8 %e33, i32 33 + // X64-NEXT: %vecinit34.i = insertelement <64 x i8> %vecinit33.i, i8 %e34, i32 34 + // X64-NEXT: %vecinit35.i = insertelement <64 x i8> %vecinit34.i, i8 %e35, i32 35 + // X64-NEXT: %vecinit36.i = insertelement <64 x i8> %vecinit35.i, i8 %e36, i32 36 + // X64-NEXT: %vecinit37.i = insertelement <64 x i8> %vecinit36.i, i8 %e37, i32 37 + // X64-NEXT: %vecinit38.i = insertelement <64 x i8> %vecinit37.i, i8 %e38, i32 38 + // X64-NEXT: %vecinit39.i = insertelement <64 x i8> %vecinit38.i, i8 %e39, i32 39 + // X64-NEXT: %vecinit40.i = insertelement <64 x i8> %vecinit39.i, i8 %e40, i32 40 + // X64-NEXT: %vecinit41.i = insertelement <64 x i8> %vecinit40.i, i8 %e41, i32 41 + // X64-NEXT: %vecinit42.i = insertelement <64 x i8> %vecinit41.i, i8 %e42, i32 42 + // X64-NEXT: %vecinit43.i = insertelement <64 x i8> %vecinit42.i, i8 %e43, i32 43 + // X64-NEXT: %vecinit44.i = insertelement <64 x i8> %vecinit43.i, i8 %e44, i32 44 + // X64-NEXT: %vecinit45.i = insertelement <64 x i8> %vecinit44.i, i8 %e45, i32 45 + // X64-NEXT: %vecinit46.i = insertelement <64 x i8> %vecinit45.i, i8 %e46, i32 46 + // X64-NEXT: %vecinit47.i = insertelement <64 x i8> %vecinit46.i, i8 %e47, i32 47 + // X64-NEXT: %vecinit48.i = insertelement <64 x i8> %vecinit47.i, i8 %e48, i32 48 + // X64-NEXT: %vecinit49.i = insertelement <64 x i8> %vecinit48.i, i8 %e49, i32 49 + // X64-NEXT: %vecinit50.i = insertelement <64 x i8> %vecinit49.i, i8 %e50, i32 50 + // X64-NEXT: %vecinit51.i = insertelement <64 x i8> %vecinit50.i, i8 %e51, i32 51 + // X64-NEXT: %vecinit52.i = insertelement <64 x i8> %vecinit51.i, i8 %e52, i32 52 + // X64-NEXT: %vecinit53.i = insertelement <64 x i8> %vecinit52.i, i8 %e53, i32 53 + // X64-NEXT: %vecinit54.i = insertelement <64 x i8> %vecinit53.i, i8 %e54, i32 54 + // X64-NEXT: %vecinit55.i = insertelement <64 x i8> %vecinit54.i, i8 %e55, i32 55 + // X64-NEXT: %vecinit56.i = insertelement <64 x i8> %vecinit55.i, i8 %e56, i32 56 + // X64-NEXT: %vecinit57.i = insertelement <64 x i8> %vecinit56.i, i8 %e57, i32 57 + // X64-NEXT: %vecinit58.i = insertelement <64 x i8> %vecinit57.i, i8 %e58, i32 58 + // X64-NEXT: %vecinit59.i = insertelement <64 x i8> %vecinit58.i, i8 %e59, i32 59 + // X64-NEXT: %vecinit60.i = insertelement <64 x i8> %vecinit59.i, i8 %e60, i32 60 + // X64-NEXT: %vecinit61.i = insertelement <64 x i8> %vecinit60.i, i8 %e61, i32 61 + // X64-NEXT: %vecinit62.i = insertelement <64 x i8> %vecinit61.i, i8 %e62, i32 62 + // X64-NEXT: %vecinit63.i = insertelement <64 x i8> %vecinit62.i, i8 %e63, i32 63 + // X64-NEXT: %0 = bitcast <64 x i8> %vecinit63.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %0 - //CHECK-LABEL: @test_mm512_set_epi8 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 - //CHECK: load i8, i8* %{{.*}}, align 1 return _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48,e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32,e31, e30, e29, e28, e27, e26, @@ -10217,39 +23443,78 @@ short e16, short e15, short e14, short e13, short e12, short e11, short e10, short e9, short e8, short e7, short e6, short e5, short e4, short e3, short e2, short e1, short e0) { - //CHECK-LABEL: @test_mm512_set_epi16 - //CHECK: insertelement{{.*}}i32 0 - //CHECK: insertelement{{.*}}i32 1 - //CHECK: insertelement{{.*}}i32 2 - //CHECK: insertelement{{.*}}i32 3 - //CHECK: insertelement{{.*}}i32 4 - //CHECK: insertelement{{.*}}i32 5 - //CHECK: insertelement{{.*}}i32 6 - //CHECK: insertelement{{.*}}i32 7 - //CHECK: insertelement{{.*}}i32 8 - //CHECK: insertelement{{.*}}i32 9 - //CHECK: insertelement{{.*}}i32 10 - //CHECK: insertelement{{.*}}i32 11 - //CHECK: insertelement{{.*}}i32 12 - //CHECK: insertelement{{.*}}i32 13 - //CHECK: insertelement{{.*}}i32 14 - //CHECK: insertelement{{.*}}i32 15 - //CHECK: insertelement{{.*}}i32 16 - //CHECK: insertelement{{.*}}i32 17 - //CHECK: insertelement{{.*}}i32 18 - //CHECK: insertelement{{.*}}i32 19 - //CHECK: insertelement{{.*}}i32 20 - //CHECK: insertelement{{.*}}i32 21 - //CHECK: insertelement{{.*}}i32 22 - //CHECK: insertelement{{.*}}i32 23 - //CHECK: insertelement{{.*}}i32 24 - //CHECK: insertelement{{.*}}i32 25 - //CHECK: insertelement{{.*}}i32 26 - //CHECK: insertelement{{.*}}i32 27 - //CHECK: insertelement{{.*}}i32 28 - //CHECK: insertelement{{.*}}i32 29 - //CHECK: insertelement{{.*}}i32 30 - //CHECK: insertelement{{.*}}i32 31 + // APPLE-LABEL: test_mm512_set_epi16 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <32 x i16> undef, i16 %e0, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <32 x i16> %vecinit.i, i16 %e1, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <32 x i16> %vecinit1.i, i16 %e2, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <32 x i16> %vecinit2.i, i16 %e3, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <32 x i16> %vecinit3.i, i16 %e4, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <32 x i16> %vecinit4.i, i16 %e5, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <32 x i16> %vecinit5.i, i16 %e6, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <32 x i16> %vecinit6.i, i16 %e7, i32 7 + // APPLE-NEXT: %vecinit8.i = insertelement <32 x i16> %vecinit7.i, i16 %e8, i32 8 + // APPLE-NEXT: %vecinit9.i = insertelement <32 x i16> %vecinit8.i, i16 %e9, i32 9 + // APPLE-NEXT: %vecinit10.i = insertelement <32 x i16> %vecinit9.i, i16 %e10, i32 10 + // APPLE-NEXT: %vecinit11.i = insertelement <32 x i16> %vecinit10.i, i16 %e11, i32 11 + // APPLE-NEXT: %vecinit12.i = insertelement <32 x i16> %vecinit11.i, i16 %e12, i32 12 + // APPLE-NEXT: %vecinit13.i = insertelement <32 x i16> %vecinit12.i, i16 %e13, i32 13 + // APPLE-NEXT: %vecinit14.i = insertelement <32 x i16> %vecinit13.i, i16 %e14, i32 14 + // APPLE-NEXT: %vecinit15.i = insertelement <32 x i16> %vecinit14.i, i16 %e15, i32 15 + // APPLE-NEXT: %vecinit16.i = insertelement <32 x i16> %vecinit15.i, i16 %e16, i32 16 + // APPLE-NEXT: %vecinit17.i = insertelement <32 x i16> %vecinit16.i, i16 %e17, i32 17 + // APPLE-NEXT: %vecinit18.i = insertelement <32 x i16> %vecinit17.i, i16 %e18, i32 18 + // APPLE-NEXT: %vecinit19.i = insertelement <32 x i16> %vecinit18.i, i16 %e19, i32 19 + // APPLE-NEXT: %vecinit20.i = insertelement <32 x i16> %vecinit19.i, i16 %e20, i32 20 + // APPLE-NEXT: %vecinit21.i = insertelement <32 x i16> %vecinit20.i, i16 %e21, i32 21 + // APPLE-NEXT: %vecinit22.i = insertelement <32 x i16> %vecinit21.i, i16 %e22, i32 22 + // APPLE-NEXT: %vecinit23.i = insertelement <32 x i16> %vecinit22.i, i16 %e23, i32 23 + // APPLE-NEXT: %vecinit24.i = insertelement <32 x i16> %vecinit23.i, i16 %e24, i32 24 + // APPLE-NEXT: %vecinit25.i = insertelement <32 x i16> %vecinit24.i, i16 %e25, i32 25 + // APPLE-NEXT: %vecinit26.i = insertelement <32 x i16> %vecinit25.i, i16 %e26, i32 26 + // APPLE-NEXT: %vecinit27.i = insertelement <32 x i16> %vecinit26.i, i16 %e27, i32 27 + // APPLE-NEXT: %vecinit28.i = insertelement <32 x i16> %vecinit27.i, i16 %e28, i32 28 + // APPLE-NEXT: %vecinit29.i = insertelement <32 x i16> %vecinit28.i, i16 %e29, i32 29 + // APPLE-NEXT: %vecinit30.i = insertelement <32 x i16> %vecinit29.i, i16 %e30, i32 30 + // APPLE-NEXT: %vecinit31.i = insertelement <32 x i16> %vecinit30.i, i16 %e31, i32 31 + // APPLE-NEXT: %0 = bitcast <32 x i16> %vecinit31.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_set_epi16 + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <32 x i16> undef, i16 %e0, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <32 x i16> %vecinit.i, i16 %e1, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <32 x i16> %vecinit1.i, i16 %e2, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <32 x i16> %vecinit2.i, i16 %e3, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <32 x i16> %vecinit3.i, i16 %e4, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <32 x i16> %vecinit4.i, i16 %e5, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <32 x i16> %vecinit5.i, i16 %e6, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <32 x i16> %vecinit6.i, i16 %e7, i32 7 + // X64-NEXT: %vecinit8.i = insertelement <32 x i16> %vecinit7.i, i16 %e8, i32 8 + // X64-NEXT: %vecinit9.i = insertelement <32 x i16> %vecinit8.i, i16 %e9, i32 9 + // X64-NEXT: %vecinit10.i = insertelement <32 x i16> %vecinit9.i, i16 %e10, i32 10 + // X64-NEXT: %vecinit11.i = insertelement <32 x i16> %vecinit10.i, i16 %e11, i32 11 + // X64-NEXT: %vecinit12.i = insertelement <32 x i16> %vecinit11.i, i16 %e12, i32 12 + // X64-NEXT: %vecinit13.i = insertelement <32 x i16> %vecinit12.i, i16 %e13, i32 13 + // X64-NEXT: %vecinit14.i = insertelement <32 x i16> %vecinit13.i, i16 %e14, i32 14 + // X64-NEXT: %vecinit15.i = insertelement <32 x i16> %vecinit14.i, i16 %e15, i32 15 + // X64-NEXT: %vecinit16.i = insertelement <32 x i16> %vecinit15.i, i16 %e16, i32 16 + // X64-NEXT: %vecinit17.i = insertelement <32 x i16> %vecinit16.i, i16 %e17, i32 17 + // X64-NEXT: %vecinit18.i = insertelement <32 x i16> %vecinit17.i, i16 %e18, i32 18 + // X64-NEXT: %vecinit19.i = insertelement <32 x i16> %vecinit18.i, i16 %e19, i32 19 + // X64-NEXT: %vecinit20.i = insertelement <32 x i16> %vecinit19.i, i16 %e20, i32 20 + // X64-NEXT: %vecinit21.i = insertelement <32 x i16> %vecinit20.i, i16 %e21, i32 21 + // X64-NEXT: %vecinit22.i = insertelement <32 x i16> %vecinit21.i, i16 %e22, i32 22 + // X64-NEXT: %vecinit23.i = insertelement <32 x i16> %vecinit22.i, i16 %e23, i32 23 + // X64-NEXT: %vecinit24.i = insertelement <32 x i16> %vecinit23.i, i16 %e24, i32 24 + // X64-NEXT: %vecinit25.i = insertelement <32 x i16> %vecinit24.i, i16 %e25, i32 25 + // X64-NEXT: %vecinit26.i = insertelement <32 x i16> %vecinit25.i, i16 %e26, i32 26 + // X64-NEXT: %vecinit27.i = insertelement <32 x i16> %vecinit26.i, i16 %e27, i32 27 + // X64-NEXT: %vecinit28.i = insertelement <32 x i16> %vecinit27.i, i16 %e28, i32 28 + // X64-NEXT: %vecinit29.i = insertelement <32 x i16> %vecinit28.i, i16 %e29, i32 29 + // X64-NEXT: %vecinit30.i = insertelement <32 x i16> %vecinit29.i, i16 %e30, i32 30 + // X64-NEXT: %vecinit31.i = insertelement <32 x i16> %vecinit30.i, i16 %e31, i32 31 + // X64-NEXT: %0 = bitcast <32 x i16> %vecinit31.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %0 return _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0); @@ -10260,25 +23525,48 @@ int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) { - //CHECK-LABEL: @test_mm512_set_epi32 - //CHECK: insertelement{{.*}}i32 0 - //CHECK: insertelement{{.*}}i32 1 - //CHECK: insertelement{{.*}}i32 2 - //CHECK: insertelement{{.*}}i32 3 - //CHECK: insertelement{{.*}}i32 4 - //CHECK: insertelement{{.*}}i32 5 - //CHECK: insertelement{{.*}}i32 6 - //CHECK: insertelement{{.*}}i32 7 - //CHECK: insertelement{{.*}}i32 8 - //CHECK: insertelement{{.*}}i32 9 - //CHECK: insertelement{{.*}}i32 10 - //CHECK: insertelement{{.*}}i32 11 - //CHECK: insertelement{{.*}}i32 12 - //CHECK: insertelement{{.*}}i32 13 - //CHECK: insertelement{{.*}}i32 14 - //CHECK: insertelement{{.*}}i32 15 - return _mm512_set_epi32( __A, __B, __C, __D,__E, __F, __G, __H, - __I, __J, __K, __L,__M, __N, __O, __P); + // APPLE-LABEL: test_mm512_set_epi32 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <16 x i32> undef, i32 %__P, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %__O, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %__N, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %__M, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %__L, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %__K, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %__J, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %__I, i32 7 + // APPLE-NEXT: %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %__H, i32 8 + // APPLE-NEXT: %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %__G, i32 9 + // APPLE-NEXT: %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %__F, i32 10 + // APPLE-NEXT: %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %__E, i32 11 + // APPLE-NEXT: %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %__D, i32 12 + // APPLE-NEXT: %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %__C, i32 13 + // APPLE-NEXT: %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %__B, i32 14 + // APPLE-NEXT: %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %__A, i32 15 + // APPLE-NEXT: %0 = bitcast <16 x i32> %vecinit15.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_set_epi32 + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <16 x i32> undef, i32 %__P, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %__O, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %__N, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %__M, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %__L, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %__K, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %__J, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %__I, i32 7 + // X64-NEXT: %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %__H, i32 8 + // X64-NEXT: %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %__G, i32 9 + // X64-NEXT: %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %__F, i32 10 + // X64-NEXT: %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %__E, i32 11 + // X64-NEXT: %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %__D, i32 12 + // X64-NEXT: %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %__C, i32 13 + // X64-NEXT: %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %__B, i32 14 + // X64-NEXT: %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %__A, i32 15 + // X64-NEXT: %0 = bitcast <16 x i32> %vecinit15.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %0 + return _mm512_set_epi32(__A, __B, __C, __D, __E, __F, __G, __H, + __I, __J, __K, __L, __M, __N, __O, __P); } __m512i test_mm512_setr_epi32 (int __A, int __B, int __C, int __D, @@ -10286,70 +23574,85 @@ int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) { - //CHECK-LABEL: @test_mm512_setr_epi32 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: insertelement{{.*}}i32 0 - //CHECK: insertelement{{.*}}i32 1 - //CHECK: insertelement{{.*}}i32 2 - //CHECK: insertelement{{.*}}i32 3 - //CHECK: insertelement{{.*}}i32 4 - //CHECK: insertelement{{.*}}i32 5 - //CHECK: insertelement{{.*}}i32 6 - //CHECK: insertelement{{.*}}i32 7 - //CHECK: insertelement{{.*}}i32 8 - //CHECK: insertelement{{.*}}i32 9 - //CHECK: insertelement{{.*}}i32 10 - //CHECK: insertelement{{.*}}i32 11 - //CHECK: insertelement{{.*}}i32 12 - //CHECK: insertelement{{.*}}i32 13 - //CHECK: insertelement{{.*}}i32 14 - //CHECK: insertelement{{.*}}i32 15 - return _mm512_setr_epi32( __A, __B, __C, __D,__E, __F, __G, __H, - __I, __J, __K, __L,__M, __N, __O, __P); + // APPLE-LABEL: test_mm512_setr_epi32 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <16 x i32> undef, i32 %__A, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %__B, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %__C, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %__D, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %__E, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %__F, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %__G, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %__H, i32 7 + // APPLE-NEXT: %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %__I, i32 8 + // APPLE-NEXT: %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %__J, i32 9 + // APPLE-NEXT: %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %__K, i32 10 + // APPLE-NEXT: %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %__L, i32 11 + // APPLE-NEXT: %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %__M, i32 12 + // APPLE-NEXT: %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %__N, i32 13 + // APPLE-NEXT: %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %__O, i32 14 + // APPLE-NEXT: %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %__P, i32 15 + // APPLE-NEXT: %0 = bitcast <16 x i32> %vecinit15.i to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %0 + // X64-LABEL: test_mm512_setr_epi32 + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <16 x i32> undef, i32 %__A, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %__B, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %__C, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %__D, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %__E, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %__F, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %__G, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %__H, i32 7 + // X64-NEXT: %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %__I, i32 8 + // X64-NEXT: %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %__J, i32 9 + // X64-NEXT: %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %__K, i32 10 + // X64-NEXT: %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %__L, i32 11 + // X64-NEXT: %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %__M, i32 12 + // X64-NEXT: %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %__N, i32 13 + // X64-NEXT: %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %__O, i32 14 + // X64-NEXT: %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %__P, i32 15 + // X64-NEXT: %0 = bitcast <16 x i32> %vecinit15.i to <8 x i64> + // X64-NEXT: ret <8 x i64> %0 + return _mm512_setr_epi32(__A, __B, __C, __D, __E, __F, __G, __H, + __I, __J, __K, __L, __M, __N, __O, __P); } __m512i test_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) { - // CHECK-LABEL: @test_mm512_mask_set1_epi64 - // CHECK: insertelement <8 x i64> undef, i64 %{{.*}}, i32 0 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 1 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 2 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 3 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 4 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 5 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 6 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 7 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_set1_epi64 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i.i = insertelement <8 x i64> undef, i64 %__A, i32 0 + // APPLE-NEXT: %vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer + // APPLE-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %vecinit7.i.i, <8 x i64> %__O + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_mask_set1_epi64 + // X64: entry: + // X64-NEXT: %vecinit.i.i = insertelement <8 x i64> undef, i64 %__A, i32 0 + // X64-NEXT: %vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %vecinit7.i.i, <8 x i64> %__O + // X64-NEXT: ret <8 x i64> %1 return _mm512_mask_set1_epi64 (__O, __M, __A); } __m512i test_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A) { - // CHECK-LABEL: @test_mm512_maskz_set1_epi64 - // CHECK: insertelement <8 x i64> undef, i64 %{{.*}}, i32 0 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 1 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 2 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 3 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 4 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 5 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 6 - // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 7 - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_set1_epi64 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i.i = insertelement <8 x i64> undef, i64 %__A, i32 0 + // APPLE-NEXT: %vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer + // APPLE-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // APPLE-NEXT: %1 = select <8 x i1> %0, <8 x i64> %vecinit7.i.i, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %1 + // X64-LABEL: test_mm512_maskz_set1_epi64 + // X64: entry: + // X64-NEXT: %vecinit.i.i = insertelement <8 x i64> undef, i64 %__A, i32 0 + // X64-NEXT: %vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i64> %vecinit7.i.i, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %1 return _mm512_maskz_set1_epi64 (__M, __A); } @@ -10358,15 +23661,28 @@ long long __D, long long __E, long long __F, long long __G, long long __H) { - //CHECK-LABEL: @test_mm512_set_epi64 - //CHECK: insertelement{{.*}}i32 0 - //CHECK: insertelement{{.*}}i32 1 - //CHECK: insertelement{{.*}}i32 2 - //CHECK: insertelement{{.*}}i32 3 - //CHECK: insertelement{{.*}}i32 4 - //CHECK: insertelement{{.*}}i32 5 - //CHECK: insertelement{{.*}}i32 6 - //CHECK: insertelement{{.*}}i32 7 + // APPLE-LABEL: test_mm512_set_epi64 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <8 x i64> undef, i64 %__H, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %__G, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %__F, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %__E, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %__D, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %__C, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %__B, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %__A, i32 7 + // APPLE-NEXT: ret <8 x i64> %vecinit7.i + // X64-LABEL: test_mm512_set_epi64 + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <8 x i64> undef, i64 %__H, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %__G, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %__F, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %__E, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %__D, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %__C, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %__B, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %__A, i32 7 + // X64-NEXT: ret <8 x i64> %vecinit7.i return _mm512_set_epi64(__A, __B, __C, __D, __E, __F, __G, __H ); } @@ -10374,61 +23690,84 @@ long long __D, long long __E, long long __F, long long __G, long long __H) { - //CHECK-LABEL: @test_mm512_setr_epi64 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: insertelement{{.*}}i32 0 - //CHECK: insertelement{{.*}}i32 1 - //CHECK: insertelement{{.*}}i32 2 - //CHECK: insertelement{{.*}}i32 3 - //CHECK: insertelement{{.*}}i32 4 - //CHECK: insertelement{{.*}}i32 5 - //CHECK: insertelement{{.*}}i32 6 - //CHECK: insertelement{{.*}}i32 7 + // APPLE-LABEL: test_mm512_setr_epi64 + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <8 x i64> undef, i64 %__A, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %__B, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %__C, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %__D, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %__E, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %__F, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %__G, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %__H, i32 7 + // APPLE-NEXT: ret <8 x i64> %vecinit7.i + // X64-LABEL: test_mm512_setr_epi64 + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <8 x i64> undef, i64 %__A, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %__B, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %__C, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %__D, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %__E, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %__F, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %__G, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %__H, i32 7 + // X64-NEXT: ret <8 x i64> %vecinit7.i return _mm512_setr_epi64(__A, __B, __C, __D, __E, __F, __G, __H ); } __m512d test_mm512_set_pd (double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H) { - //CHECK-LABEL: @test_mm512_set_pd - //CHECK: insertelement{{.*}}i32 0 - //CHECK: insertelement{{.*}}i32 1 - //CHECK: insertelement{{.*}}i32 2 - //CHECK: insertelement{{.*}}i32 3 - //CHECK: insertelement{{.*}}i32 4 - //CHECK: insertelement{{.*}}i32 5 - //CHECK: insertelement{{.*}}i32 6 - //CHECK: insertelement{{.*}}i32 7 + // APPLE-LABEL: test_mm512_set_pd + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <8 x double> undef, double %__H, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <8 x double> %vecinit.i, double %__G, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %__F, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %__E, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %__D, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %__C, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %__B, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %__A, i32 7 + // APPLE-NEXT: ret <8 x double> %vecinit7.i + // X64-LABEL: test_mm512_set_pd + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <8 x double> undef, double %__H, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <8 x double> %vecinit.i, double %__G, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %__F, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %__E, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %__D, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %__C, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %__B, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %__A, i32 7 + // X64-NEXT: ret <8 x double> %vecinit7.i return _mm512_set_pd( __A, __B, __C, __D, __E, __F, __G, __H); } __m512d test_mm512_setr_pd (double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H) { - //CHECK-LABEL: @test_mm512_setr_pd - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: load{{.*}}%{{.*}}, align 8 - //CHECK: insertelement{{.*}}i32 0 - //CHECK: insertelement{{.*}}i32 1 - //CHECK: insertelement{{.*}}i32 2 - //CHECK: insertelement{{.*}}i32 3 - //CHECK: insertelement{{.*}}i32 4 - //CHECK: insertelement{{.*}}i32 5 - //CHECK: insertelement{{.*}}i32 6 - //CHECK: insertelement{{.*}}i32 7 + // APPLE-LABEL: test_mm512_setr_pd + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <8 x double> undef, double %__A, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <8 x double> %vecinit.i, double %__B, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %__C, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %__D, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %__E, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %__F, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %__G, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %__H, i32 7 + // APPLE-NEXT: ret <8 x double> %vecinit7.i + // X64-LABEL: test_mm512_setr_pd + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <8 x double> undef, double %__A, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <8 x double> %vecinit.i, double %__B, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %__C, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %__D, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %__E, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %__F, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %__G, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %__H, i32 7 + // X64-NEXT: ret <8 x double> %vecinit7.i return _mm512_setr_pd( __A, __B, __C, __D, __E, __F, __G, __H); } @@ -10437,64 +23776,139 @@ float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P) { - //CHECK-LABEL: @test_mm512_set_ps - //CHECK: insertelement{{.*}}i32 0 - //CHECK: insertelement{{.*}}i32 1 - //CHECK: insertelement{{.*}}i32 2 - //CHECK: insertelement{{.*}}i32 3 - //CHECK: insertelement{{.*}}i32 4 - //CHECK: insertelement{{.*}}i32 5 - //CHECK: insertelement{{.*}}i32 6 - //CHECK: insertelement{{.*}}i32 7 - //CHECK: insertelement{{.*}}i32 8 - //CHECK: insertelement{{.*}}i32 9 - //CHECK: insertelement{{.*}}i32 10 - //CHECK: insertelement{{.*}}i32 11 - //CHECK: insertelement{{.*}}i32 12 - //CHECK: insertelement{{.*}}i32 13 - //CHECK: insertelement{{.*}}i32 14 - //CHECK: insertelement{{.*}}i32 15 - return _mm512_set_ps( __A, __B, __C, __D, __E, __F, __G, __H, - __I, __J, __K, __L, __M, __N, __O, __P); + // APPLE-LABEL: test_mm512_set_ps + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <16 x float> undef, float %__P, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <16 x float> %vecinit.i, float %__O, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %__N, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %__M, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %__L, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %__K, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %__J, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %__I, i32 7 + // APPLE-NEXT: %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %__H, i32 8 + // APPLE-NEXT: %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %__G, i32 9 + // APPLE-NEXT: %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %__F, i32 10 + // APPLE-NEXT: %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %__E, i32 11 + // APPLE-NEXT: %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %__D, i32 12 + // APPLE-NEXT: %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %__C, i32 13 + // APPLE-NEXT: %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %__B, i32 14 + // APPLE-NEXT: %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %__A, i32 15 + // APPLE-NEXT: ret <16 x float> %vecinit15.i + // X64-LABEL: test_mm512_set_ps + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <16 x float> undef, float %__P, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <16 x float> %vecinit.i, float %__O, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %__N, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %__M, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %__L, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %__K, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %__J, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %__I, i32 7 + // X64-NEXT: %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %__H, i32 8 + // X64-NEXT: %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %__G, i32 9 + // X64-NEXT: %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %__F, i32 10 + // X64-NEXT: %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %__E, i32 11 + // X64-NEXT: %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %__D, i32 12 + // X64-NEXT: %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %__C, i32 13 + // X64-NEXT: %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %__B, i32 14 + // X64-NEXT: %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %__A, i32 15 + // X64-NEXT: ret <16 x float> %vecinit15.i + return _mm512_set_ps(__A, __B, __C, __D, __E, __F, __G, __H, + __I, __J, __K, __L, __M, __N, __O, __P); } __m512i test_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <8 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[A]], <8 x i64> [[SUB]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[SEL]], <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_abs_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = sub <8 x i64> zeroinitializer, %__A + // APPLE-NEXT: %1 = icmp slt <8 x i64> %__A, zeroinitializer + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A + // APPLE-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> %__W + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_mask_abs_epi64 + // X64: entry: + // X64-NEXT: %0 = sub <8 x i64> zeroinitializer, %__A + // X64-NEXT: %1 = icmp slt <8 x i64> %__A, zeroinitializer + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> %__W + // X64-NEXT: ret <8 x i64> %4 return _mm512_mask_abs_epi64 (__W,__U,__A); } __m512i test_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <8 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[A]], <8 x i64> [[SUB]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[SEL]], <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_abs_epi64 + // APPLE: entry: + // APPLE-NEXT: %0 = sub <8 x i64> zeroinitializer, %__A + // APPLE-NEXT: %1 = icmp slt <8 x i64> %__A, zeroinitializer + // APPLE-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A + // APPLE-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer + // APPLE-NEXT: ret <8 x i64> %4 + // X64-LABEL: test_mm512_maskz_abs_epi64 + // X64: entry: + // X64-NEXT: %0 = sub <8 x i64> zeroinitializer, %__A + // X64-NEXT: %1 = icmp slt <8 x i64> %__A, zeroinitializer + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer + // X64-NEXT: ret <8 x i64> %4 return _mm512_maskz_abs_epi64 (__U,__A); } __m512i test_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_abs_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = sub <16 x i32> zeroinitializer, %0 + // APPLE-NEXT: %2 = icmp slt <16 x i32> %0, zeroinitializer + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %0 + // APPLE-NEXT: %4 = bitcast <8 x i64> %__W to <16 x i32> + // APPLE-NEXT: %5 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 + // APPLE-NEXT: %7 = bitcast <16 x i32> %6 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %7 + // X64-LABEL: test_mm512_mask_abs_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = sub <16 x i32> zeroinitializer, %0 + // X64-NEXT: %2 = icmp slt <16 x i32> %0, zeroinitializer + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %0 + // X64-NEXT: %4 = bitcast <8 x i64> %__W to <16 x i32> + // X64-NEXT: %5 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 + // X64-NEXT: %7 = bitcast <16 x i32> %6 to <8 x i64> + // X64-NEXT: ret <8 x i64> %7 return _mm512_mask_abs_epi32 (__W,__U,__A); } __m512i test_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_maskz_abs_epi32 + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // APPLE-NEXT: %1 = sub <16 x i32> zeroinitializer, %0 + // APPLE-NEXT: %2 = icmp slt <16 x i32> %0, zeroinitializer + // APPLE-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %0 + // APPLE-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // APPLE-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // APPLE-NEXT: ret <8 x i64> %6 + // X64-LABEL: test_mm512_maskz_abs_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x i64> %__A to <16 x i32> + // X64-NEXT: %1 = sub <16 x i32> zeroinitializer, %0 + // X64-NEXT: %2 = icmp slt <16 x i32> %0, zeroinitializer + // X64-NEXT: %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %0 + // X64-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <16 x i32> %5 to <8 x i64> + // X64-NEXT: ret <8 x i64> %6 return _mm512_maskz_abs_epi32 (__U,__A); } @@ -10503,341 +23917,648 @@ float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P) { - //CHECK-LABEL: @test_mm512_setr_ps - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: load{{.*}}%{{.*}}, align 4 - //CHECK: insertelement{{.*}}i32 0 - //CHECK: insertelement{{.*}}i32 1 - //CHECK: insertelement{{.*}}i32 2 - //CHECK: insertelement{{.*}}i32 3 - //CHECK: insertelement{{.*}}i32 4 - //CHECK: insertelement{{.*}}i32 5 - //CHECK: insertelement{{.*}}i32 6 - //CHECK: insertelement{{.*}}i32 7 - //CHECK: insertelement{{.*}}i32 8 - //CHECK: insertelement{{.*}}i32 9 - //CHECK: insertelement{{.*}}i32 10 - //CHECK: insertelement{{.*}}i32 11 - //CHECK: insertelement{{.*}}i32 12 - //CHECK: insertelement{{.*}}i32 13 - //CHECK: insertelement{{.*}}i32 14 - //CHECK: insertelement{{.*}}i32 15 - return _mm512_setr_ps( __A, __B, __C, __D, __E, __F, __G, __H, - __I, __J, __K, __L, __M, __N, __O, __P); + // APPLE-LABEL: test_mm512_setr_ps + // APPLE: entry: + // APPLE-NEXT: %vecinit.i = insertelement <16 x float> undef, float %__A, i32 0 + // APPLE-NEXT: %vecinit1.i = insertelement <16 x float> %vecinit.i, float %__B, i32 1 + // APPLE-NEXT: %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %__C, i32 2 + // APPLE-NEXT: %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %__D, i32 3 + // APPLE-NEXT: %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %__E, i32 4 + // APPLE-NEXT: %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %__F, i32 5 + // APPLE-NEXT: %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %__G, i32 6 + // APPLE-NEXT: %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %__H, i32 7 + // APPLE-NEXT: %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %__I, i32 8 + // APPLE-NEXT: %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %__J, i32 9 + // APPLE-NEXT: %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %__K, i32 10 + // APPLE-NEXT: %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %__L, i32 11 + // APPLE-NEXT: %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %__M, i32 12 + // APPLE-NEXT: %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %__N, i32 13 + // APPLE-NEXT: %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %__O, i32 14 + // APPLE-NEXT: %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %__P, i32 15 + // APPLE-NEXT: ret <16 x float> %vecinit15.i + // X64-LABEL: test_mm512_setr_ps + // X64: entry: + // X64-NEXT: %vecinit.i = insertelement <16 x float> undef, float %__A, i32 0 + // X64-NEXT: %vecinit1.i = insertelement <16 x float> %vecinit.i, float %__B, i32 1 + // X64-NEXT: %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %__C, i32 2 + // X64-NEXT: %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %__D, i32 3 + // X64-NEXT: %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %__E, i32 4 + // X64-NEXT: %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %__F, i32 5 + // X64-NEXT: %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %__G, i32 6 + // X64-NEXT: %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %__H, i32 7 + // X64-NEXT: %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %__I, i32 8 + // X64-NEXT: %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %__J, i32 9 + // X64-NEXT: %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %__K, i32 10 + // X64-NEXT: %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %__L, i32 11 + // X64-NEXT: %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %__M, i32 12 + // X64-NEXT: %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %__N, i32 13 + // X64-NEXT: %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %__O, i32 14 + // X64-NEXT: %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %__P, i32 15 + // X64-NEXT: ret <16 x float> %vecinit15.i + return _mm512_setr_ps(__A, __B, __C, __D, __E, __F, __G, __H, + __I, __J, __K, __L, __M, __N, __O, __P); } int test_mm_cvtss_i32(__m128 A) { - // CHECK-LABEL: test_mm_cvtss_i32 - // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}}) + // APPLE-LABEL: test_mm_cvtss_i32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %A) #12 + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvtss_i32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %A) #12 + // X64-NEXT: ret i32 %0 return _mm_cvtss_i32(A); } #ifdef __x86_64__ long long test_mm_cvtss_i64(__m128 A) { - // CHECK-LABEL: test_mm_cvtss_i64 - // CHECK: call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}}) + // APPLE-LABEL: test_mm_cvtss_i64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %A) #12 + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvtss_i64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %A) #12 + // X64-NEXT: ret i64 %0 return _mm_cvtss_i64(A); } #endif __m128d test_mm_cvti32_sd(__m128d A, int B) { - // CHECK-LABEL: test_mm_cvti32_sd - // CHECK: sitofp i32 %{{.*}} to double - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // APPLE-LABEL: test_mm_cvti32_sd + // APPLE: entry: + // APPLE-NEXT: %conv.i = sitofp i32 %B to double + // APPLE-NEXT: %vecins.i = insertelement <2 x double> %A, double %conv.i, i32 0 + // APPLE-NEXT: ret <2 x double> %vecins.i + // X64-LABEL: test_mm_cvti32_sd + // X64: entry: + // X64-NEXT: %conv.i = sitofp i32 %B to double + // X64-NEXT: %vecins.i = insertelement <2 x double> %A, double %conv.i, i32 0 + // X64-NEXT: ret <2 x double> %vecins.i return _mm_cvti32_sd(A, B); } #ifdef __x86_64__ __m128d test_mm_cvti64_sd(__m128d A, long long B) { - // CHECK-LABEL: test_mm_cvti64_sd - // CHECK: sitofp i64 %{{.*}} to double - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // APPLE-LABEL: test_mm_cvti64_sd + // APPLE: entry: + // APPLE-NEXT: %conv.i = sitofp i64 %B to double + // APPLE-NEXT: %vecins.i = insertelement <2 x double> %A, double %conv.i, i32 0 + // APPLE-NEXT: ret <2 x double> %vecins.i + // X64-LABEL: test_mm_cvti64_sd + // X64: entry: + // X64-NEXT: %conv.i = sitofp i64 %B to double + // X64-NEXT: %vecins.i = insertelement <2 x double> %A, double %conv.i, i32 0 + // X64-NEXT: ret <2 x double> %vecins.i return _mm_cvti64_sd(A, B); } #endif __m128 test_mm_cvti32_ss(__m128 A, int B) { - // CHECK-LABEL: test_mm_cvti32_ss - // CHECK: sitofp i32 %{{.*}} to float - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // APPLE-LABEL: test_mm_cvti32_ss + // APPLE: entry: + // APPLE-NEXT: %conv.i = sitofp i32 %B to float + // APPLE-NEXT: %vecins.i = insertelement <4 x float> %A, float %conv.i, i32 0 + // APPLE-NEXT: ret <4 x float> %vecins.i + // X64-LABEL: test_mm_cvti32_ss + // X64: entry: + // X64-NEXT: %conv.i = sitofp i32 %B to float + // X64-NEXT: %vecins.i = insertelement <4 x float> %A, float %conv.i, i32 0 + // X64-NEXT: ret <4 x float> %vecins.i return _mm_cvti32_ss(A, B); } #ifdef __x86_64__ __m128 test_mm_cvti64_ss(__m128 A, long long B) { - // CHECK-LABEL: test_mm_cvti64_ss - // CHECK: sitofp i64 %{{.*}} to float - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // APPLE-LABEL: test_mm_cvti64_ss + // APPLE: entry: + // APPLE-NEXT: %conv.i = sitofp i64 %B to float + // APPLE-NEXT: %vecins.i = insertelement <4 x float> %A, float %conv.i, i32 0 + // APPLE-NEXT: ret <4 x float> %vecins.i + // X64-LABEL: test_mm_cvti64_ss + // X64: entry: + // X64-NEXT: %conv.i = sitofp i64 %B to float + // X64-NEXT: %vecins.i = insertelement <4 x float> %A, float %conv.i, i32 0 + // X64-NEXT: ret <4 x float> %vecins.i return _mm_cvti64_ss(A, B); } #endif int test_mm_cvtsd_i32(__m128d A) { - // CHECK-LABEL: test_mm_cvtsd_i32 - // CHECK: call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}}) + // APPLE-LABEL: test_mm_cvtsd_i32 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %A) #12 + // APPLE-NEXT: ret i32 %0 + // X64-LABEL: test_mm_cvtsd_i32 + // X64: entry: + // X64-NEXT: %0 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %A) #12 + // X64-NEXT: ret i32 %0 return _mm_cvtsd_i32(A); } #ifdef __x86_64__ long long test_mm_cvtsd_i64(__m128d A) { - // CHECK-LABEL: test_mm_cvtsd_i64 - // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}}) + // APPLE-LABEL: test_mm_cvtsd_i64 + // APPLE: entry: + // APPLE-NEXT: %0 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %A) #12 + // APPLE-NEXT: ret i64 %0 + // X64-LABEL: test_mm_cvtsd_i64 + // X64: entry: + // X64-NEXT: %0 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %A) #12 + // X64-NEXT: ret i64 %0 return _mm_cvtsd_i64(A); } #endif __m128d test_mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_cvtss_sd - // CHECK: @llvm.x86.avx512.mask.cvtss2sd.round + // APPLE-LABEL: test_mm_mask_cvtss_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %__A, <4 x float> %__B, <2 x double> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_mask_cvtss_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %__A, <4 x float> %__B, <2 x double> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_mask_cvtss_sd(__W, __U, __A, __B); } __m128d test_mm_maskz_cvtss_sd( __mmask8 __U, __m128d __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_cvtss_sd - // CHECK: @llvm.x86.avx512.mask.cvtss2sd.round + // APPLE-LABEL: test_mm_maskz_cvtss_sd + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %__A, <4 x float> %__B, <2 x double> zeroinitializer, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <2 x double> %0 + // X64-LABEL: test_mm_maskz_cvtss_sd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %__A, <4 x float> %__B, <2 x double> zeroinitializer, i8 %__U, i32 4) #12 + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_cvtss_sd( __U, __A, __B); } __m128 test_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_cvtsd_ss - // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round + // APPLE-LABEL: test_mm_mask_cvtsd_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %__A, <2 x double> %__B, <4 x float> %__W, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_mask_cvtsd_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %__A, <2 x double> %__B, <4 x float> %__W, i8 %__U, i32 4) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_mask_cvtsd_ss(__W, __U, __A, __B); } __m128 test_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_cvtsd_ss - // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round + // APPLE-LABEL: test_mm_maskz_cvtsd_ss + // APPLE: entry: + // APPLE-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %__A, <2 x double> %__B, <4 x float> zeroinitializer, i8 %__U, i32 4) #12 + // APPLE-NEXT: ret <4 x float> %0 + // X64-LABEL: test_mm_maskz_cvtsd_ss + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %__A, <2 x double> %__B, <4 x float> zeroinitializer, i8 %__U, i32 4) #12 + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_cvtsd_ss(__U, __A, __B); } __m512i test_mm512_setzero_epi32() { - // CHECK-LABEL: @test_mm512_setzero_epi32 - // CHECK: zeroinitializer + // APPLE-LABEL: test_mm512_setzero_epi32 + // APPLE: entry: + // APPLE-NEXT: ret <8 x i64> zeroinitializer + // X64-LABEL: test_mm512_setzero_epi32 + // X64: entry: + // X64-NEXT: ret <8 x i64> zeroinitializer return _mm512_setzero_epi32(); } __m512i test_mm512_setzero() { - // CHECK-LABEL: @test_mm512_setzero - // CHECK: zeroinitializer + // APPLE-LABEL: test_mm512_setzero + // APPLE: entry: + // APPLE-NEXT: ret <8 x i64> zeroinitializer + // X64-LABEL: test_mm512_setzero + // X64: entry: + // X64-NEXT: ret <8 x i64> zeroinitializer return _mm512_setzero(); } __m512i test_mm512_setzero_si512() { - // CHECK-LABEL: @test_mm512_setzero_si512 - // CHECK: zeroinitializer + // APPLE-LABEL: test_mm512_setzero_si512 + // APPLE: entry: + // APPLE-NEXT: ret <8 x i64> zeroinitializer + // X64-LABEL: test_mm512_setzero_si512 + // X64: entry: + // X64-NEXT: ret <8 x i64> zeroinitializer return _mm512_setzero_si512(); } __m512i test_mm512_setzero_ps() { - // CHECK-LABEL: @test_mm512_setzero_ps - // CHECK: zeroinitializer + // APPLE-LABEL: test_mm512_setzero_ps + // APPLE: entry: + // APPLE-NEXT: ret <8 x i64> zeroinitializer + // X64-LABEL: test_mm512_setzero_ps + // X64: entry: + // X64-NEXT: ret <8 x i64> zeroinitializer return _mm512_setzero_ps(); } __m512d test_mm512_setzero_pd() { - // CHECK-LABEL: @test_mm512_setzero_pd - // CHECK: zeroinitializer + // APPLE-LABEL: test_mm512_setzero_pd + // APPLE: entry: + // APPLE-NEXT: ret <8 x double> zeroinitializer + // X64-LABEL: test_mm512_setzero_pd + // X64: entry: + // X64-NEXT: ret <8 x double> zeroinitializer return _mm512_setzero_pd(); } __mmask16 test_mm512_int2mask(int __a) { - // CHECK-LABEL: test_mm512_int2mask - // CHECK: trunc i32 %{{.*}} to i16 + // APPLE-LABEL: test_mm512_int2mask + // APPLE: entry: + // APPLE-NEXT: %conv.i = trunc i32 %__a to i16 + // APPLE-NEXT: ret i16 %conv.i + // X64-LABEL: test_mm512_int2mask + // X64: entry: + // X64-NEXT: %conv.i = trunc i32 %__a to i16 + // X64-NEXT: ret i16 %conv.i return _mm512_int2mask(__a); } int test_mm512_mask2int(__mmask16 __a) { - // CHECK-LABEL: test_mm512_mask2int - // CHECK: zext i16 %{{.*}} to i32 + // APPLE-LABEL: test_mm512_mask2int + // APPLE: entry: + // APPLE-NEXT: %conv.i = zext i16 %__a to i32 + // APPLE-NEXT: ret i32 %conv.i + // X64-LABEL: test_mm512_mask2int + // X64: entry: + // X64-NEXT: %conv.i = zext i16 %__a to i32 + // X64-NEXT: ret i32 %conv.i return _mm512_mask2int(__a); } __m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_move_ss - // CHECK: [[EXT:%.*]] = extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: insertelement <4 x float> %{{.*}}, float [[EXT]], i32 0 - // CHECK: [[A:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0 - // CHECK-NEXT: [[B:%.*]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, float [[A]], float [[B]] - // CHECK-NEXT: insertelement <4 x float> [[VEC]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_move_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__B, i32 0 + // APPLE-NEXT: %1 = extractelement <4 x float> %__W, i64 0 + // APPLE-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %3 = extractelement <8 x i1> %2, i64 0 + // APPLE-NEXT: %4 = select i1 %3, float %0, float %1 + // APPLE-NEXT: %5 = insertelement <4 x float> %__A, float %4, i64 0 + // APPLE-NEXT: ret <4 x float> %5 + // X64-LABEL: test_mm_mask_move_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__B, i32 0 + // X64-NEXT: %1 = extractelement <4 x float> %__W, i64 0 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = extractelement <8 x i1> %2, i64 0 + // X64-NEXT: %4 = select i1 %3, float %0, float %1 + // X64-NEXT: %5 = insertelement <4 x float> %__A, float %4, i64 0 + // X64-NEXT: ret <4 x float> %5 return _mm_mask_move_ss ( __W, __U, __A, __B); } __m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_move_ss - // CHECK: [[EXT:%.*]] = extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: insertelement <4 x float> %{{.*}}, float [[EXT]], i32 0 - // CHECK: [[A:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0 - // CHECK-NEXT: [[B:%.*]] = extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, float [[A]], float [[B]] - // CHECK-NEXT: insertelement <4 x float> [[VEC]], float [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_move_ss + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <4 x float> %__B, i32 0 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // APPLE-NEXT: %3 = select i1 %2, float %0, float 0.000000e+00 + // APPLE-NEXT: %4 = insertelement <4 x float> %__A, float %3, i64 0 + // APPLE-NEXT: ret <4 x float> %4 + // X64-LABEL: test_mm_maskz_move_ss + // X64: entry: + // X64-NEXT: %0 = extractelement <4 x float> %__B, i32 0 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // X64-NEXT: %3 = select i1 %2, float %0, float 0.000000e+00 + // X64-NEXT: %4 = insertelement <4 x float> %__A, float %3, i64 0 + // X64-NEXT: ret <4 x float> %4 return _mm_maskz_move_ss (__U, __A, __B); } __m128d test_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_move_sd - // CHECK: [[EXT:%.*]] = extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: insertelement <2 x double> %{{.*}}, double [[EXT]], i32 0 - // CHECK: [[A:%.*]] = extractelement <2 x double> [[VEC:%.*]], i64 0 - // CHECK-NEXT: [[B:%.*]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, double [[A]], double [[B]] - // CHECK-NEXT: insertelement <2 x double> [[VEC]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_mask_move_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__B, i32 0 + // APPLE-NEXT: %1 = extractelement <2 x double> %__W, i64 0 + // APPLE-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %3 = extractelement <8 x i1> %2, i64 0 + // APPLE-NEXT: %4 = select i1 %3, double %0, double %1 + // APPLE-NEXT: %5 = insertelement <2 x double> %__A, double %4, i64 0 + // APPLE-NEXT: ret <2 x double> %5 + // X64-LABEL: test_mm_mask_move_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__B, i32 0 + // X64-NEXT: %1 = extractelement <2 x double> %__W, i64 0 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = extractelement <8 x i1> %2, i64 0 + // X64-NEXT: %4 = select i1 %3, double %0, double %1 + // X64-NEXT: %5 = insertelement <2 x double> %__A, double %4, i64 0 + // X64-NEXT: ret <2 x double> %5 return _mm_mask_move_sd ( __W, __U, __A, __B); } __m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_move_sd - // CHECK: [[EXT:%.*]] = extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: insertelement <2 x double> %{{.*}}, double [[EXT]], i32 0 - // CHECK: [[A:%.*]] = extractelement <2 x double> [[VEC:%.*]], i64 0 - // CHECK-NEXT: [[B:%.*]] = extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> - // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 - // CHECK-NEXT: [[SEL:%.*]] = select i1 %13, double [[A]], double [[B]] - // CHECK-NEXT: insertelement <2 x double> [[VEC]], double [[SEL]], i64 0 + // APPLE-LABEL: test_mm_maskz_move_sd + // APPLE: entry: + // APPLE-NEXT: %0 = extractelement <2 x double> %__B, i32 0 + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // APPLE-NEXT: %3 = select i1 %2, double %0, double 0.000000e+00 + // APPLE-NEXT: %4 = insertelement <2 x double> %__A, double %3, i64 0 + // APPLE-NEXT: ret <2 x double> %4 + // X64-LABEL: test_mm_maskz_move_sd + // X64: entry: + // X64-NEXT: %0 = extractelement <2 x double> %__B, i32 0 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = extractelement <8 x i1> %1, i64 0 + // X64-NEXT: %3 = select i1 %2, double %0, double 0.000000e+00 + // X64-NEXT: %4 = insertelement <2 x double> %__A, double %3, i64 0 + // X64-NEXT: ret <2 x double> %4 return _mm_maskz_move_sd (__U, __A, __B); } void test_mm_mask_store_ss(float * __P, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_store_ss - // CHECK: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %{{.*}}, <4 x float>* %{{.*}}, i32 1, <4 x i1> %{{.*}}) + // APPLE-LABEL: test_mm_mask_store_ss + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast float* %__P to <4 x float>* + // APPLE-NEXT: %1 = and i8 %__U, 1 + // APPLE-NEXT: %2 = bitcast i8 %1 to <8 x i1> + // APPLE-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // APPLE-NEXT: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %__A, <4 x float>* %0, i32 1, <4 x i1> %extract.i) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm_mask_store_ss + // X64: entry: + // X64-NEXT: %0 = bitcast float* %__P to <4 x float>* + // X64-NEXT: %1 = and i8 %__U, 1 + // X64-NEXT: %2 = bitcast i8 %1 to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %__A, <4 x float>* %0, i32 1, <4 x i1> %extract.i) #12 + // X64-NEXT: ret void _mm_mask_store_ss(__P, __U, __A); } void test_mm_mask_store_sd(double * __P, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_store_sd - // CHECK: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %{{.*}}, <2 x double>* %{{.*}}, i32 1, <2 x i1> %{{.*}}) + // APPLE-LABEL: test_mm_mask_store_sd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast double* %__P to <2 x double>* + // APPLE-NEXT: %1 = and i8 %__U, 1 + // APPLE-NEXT: %2 = bitcast i8 %1 to <8 x i1> + // APPLE-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // APPLE-NEXT: tail call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %__A, <2 x double>* %0, i32 1, <2 x i1> %extract.i) #12 + // APPLE-NEXT: ret void + // X64-LABEL: test_mm_mask_store_sd + // X64: entry: + // X64-NEXT: %0 = bitcast double* %__P to <2 x double>* + // X64-NEXT: %1 = and i8 %__U, 1 + // X64-NEXT: %2 = bitcast i8 %1 to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %__A, <2 x double>* %0, i32 1, <2 x i1> %extract.i) #12 + // X64-NEXT: ret void _mm_mask_store_sd(__P, __U, __A); } __m128 test_mm_mask_load_ss(__m128 __A, __mmask8 __U, const float* __W) { - // CHECK-LABEL: @test_mm_mask_load_ss - // CHECK: call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x float> %{{.*}}) + // APPLE-LABEL: test_mm_mask_load_ss + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <4 x float> %__A, <4 x float> , <4 x i32> + // APPLE-NEXT: %0 = bitcast float* %__W to <4 x float>* + // APPLE-NEXT: %1 = and i8 %__U, 1 + // APPLE-NEXT: %2 = bitcast i8 %1 to <8 x i1> + // APPLE-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // APPLE-NEXT: %3 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 1, <4 x i1> %extract.i, <4 x float> %shuffle.i) #12 + // APPLE-NEXT: ret <4 x float> %3 + // X64-LABEL: test_mm_mask_load_ss + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <4 x float> %__A, <4 x float> , <4 x i32> + // X64-NEXT: %0 = bitcast float* %__W to <4 x float>* + // X64-NEXT: %1 = and i8 %__U, 1 + // X64-NEXT: %2 = bitcast i8 %1 to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 1, <4 x i1> %extract.i, <4 x float> %shuffle.i) #12 + // X64-NEXT: ret <4 x float> %3 return _mm_mask_load_ss(__A, __U, __W); } __m128 test_mm_maskz_load_ss (__mmask8 __U, const float * __W) { - // CHECK-LABEL: @test_mm_maskz_load_ss - // CHECK: call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x float> %{{.*}}) + // APPLE-LABEL: test_mm_maskz_load_ss + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast float* %__W to <4 x float>* + // APPLE-NEXT: %1 = and i8 %__U, 1 + // APPLE-NEXT: %2 = bitcast i8 %1 to <8 x i1> + // APPLE-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // APPLE-NEXT: %3 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 1, <4 x i1> %extract.i, <4 x float> zeroinitializer) #12 + // APPLE-NEXT: ret <4 x float> %3 + // X64-LABEL: test_mm_maskz_load_ss + // X64: entry: + // X64-NEXT: %0 = bitcast float* %__W to <4 x float>* + // X64-NEXT: %1 = and i8 %__U, 1 + // X64-NEXT: %2 = bitcast i8 %1 to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 1, <4 x i1> %extract.i, <4 x float> zeroinitializer) #12 + // X64-NEXT: ret <4 x float> %3 return _mm_maskz_load_ss (__U, __W); } __m128d test_mm_mask_load_sd (__m128d __A, __mmask8 __U, const double * __W) { - // CHECK-LABEL: @test_mm_mask_load_sd - // CHECK: call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %{{.*}}, i32 1, <2 x i1> %{{.*}}, <2 x double> %{{.*}}) + // APPLE-LABEL: test_mm_mask_load_sd + // APPLE: entry: + // APPLE-NEXT: %shuffle3.i = insertelement <2 x double> %__A, double 0.000000e+00, i32 1 + // APPLE-NEXT: %0 = bitcast double* %__W to <2 x double>* + // APPLE-NEXT: %1 = and i8 %__U, 1 + // APPLE-NEXT: %2 = bitcast i8 %1 to <8 x i1> + // APPLE-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // APPLE-NEXT: %3 = tail call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %0, i32 1, <2 x i1> %extract.i, <2 x double> %shuffle3.i) #12 + // APPLE-NEXT: ret <2 x double> %3 + // X64-LABEL: test_mm_mask_load_sd + // X64: entry: + // X64-NEXT: %shuffle3.i = insertelement <2 x double> %__A, double 0.000000e+00, i32 1 + // X64-NEXT: %0 = bitcast double* %__W to <2 x double>* + // X64-NEXT: %1 = and i8 %__U, 1 + // X64-NEXT: %2 = bitcast i8 %1 to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: %3 = tail call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %0, i32 1, <2 x i1> %extract.i, <2 x double> %shuffle3.i) #12 + // X64-NEXT: ret <2 x double> %3 return _mm_mask_load_sd (__A, __U, __W); } __m128d test_mm_maskz_load_sd (__mmask8 __U, const double * __W) { - // CHECK-LABEL: @test_mm_maskz_load_sd - // CHECK: call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %{{.*}}, i32 1, <2 x i1> %{{.*}}, <2 x double> %{{.*}}) + // APPLE-LABEL: test_mm_maskz_load_sd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast double* %__W to <2 x double>* + // APPLE-NEXT: %1 = and i8 %__U, 1 + // APPLE-NEXT: %2 = bitcast i8 %1 to <8 x i1> + // APPLE-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // APPLE-NEXT: %3 = tail call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %0, i32 1, <2 x i1> %extract.i, <2 x double> zeroinitializer) #12 + // APPLE-NEXT: ret <2 x double> %3 + // X64-LABEL: test_mm_maskz_load_sd + // X64: entry: + // X64-NEXT: %0 = bitcast double* %__W to <2 x double>* + // X64-NEXT: %1 = and i8 %__U, 1 + // X64-NEXT: %2 = bitcast i8 %1 to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: %3 = tail call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %0, i32 1, <2 x i1> %extract.i, <2 x double> zeroinitializer) #12 + // X64-NEXT: ret <2 x double> %3 return _mm_maskz_load_sd (__U, __W); } __m512d test_mm512_abs_pd(__m512d a){ - // CHECK-LABEL: @test_mm512_abs_pd - // CHECK: and <8 x i64> + // APPLE-LABEL: test_mm512_abs_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x double> %a to <8 x i64> + // APPLE-NEXT: %and.i.i = and <8 x i64> %0, + // APPLE-NEXT: %1 = bitcast <8 x i64> %and.i.i to <8 x double> + // APPLE-NEXT: ret <8 x double> %1 + // X64-LABEL: test_mm512_abs_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x double> %a to <8 x i64> + // X64-NEXT: %and.i.i = and <8 x i64> %0, + // X64-NEXT: %1 = bitcast <8 x i64> %and.i.i to <8 x double> + // X64-NEXT: ret <8 x double> %1 return _mm512_abs_pd(a); } __m512d test_mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A){ - // CHECK-LABEL: @test_mm512_mask_abs_pd - // CHECK: %[[AND_RES:.*]] = and <8 x i64> - // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[AND_RES]], <8 x i64> %{{.*}} + // APPLE-LABEL: test_mm512_mask_abs_pd + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <8 x double> %__A to <8 x i64> + // APPLE-NEXT: %and.i.i.i = and <8 x i64> %0, + // APPLE-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // APPLE-NEXT: %2 = bitcast <8 x i64> %and.i.i.i to <8 x double> + // APPLE-NEXT: %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %__W + // APPLE-NEXT: ret <8 x double> %3 + // X64-LABEL: test_mm512_mask_abs_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <8 x double> %__A to <8 x i64> + // X64-NEXT: %and.i.i.i = and <8 x i64> %0, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = bitcast <8 x i64> %and.i.i.i to <8 x double> + // X64-NEXT: %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %__W + // X64-NEXT: ret <8 x double> %3 return _mm512_mask_abs_pd (__W,__U,__A); } __m512 test_mm512_abs_ps(__m512 a){ - // CHECK-LABEL: @test_mm512_abs_ps - // CHECK: and <16 x i32> + // APPLE-LABEL: test_mm512_abs_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <16 x float> %a to <8 x i64> + // APPLE-NEXT: %and1.i.i = and <8 x i64> %0, + // APPLE-NEXT: %1 = bitcast <8 x i64> %and1.i.i to <16 x float> + // APPLE-NEXT: ret <16 x float> %1 + // X64-LABEL: test_mm512_abs_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <16 x float> %a to <8 x i64> + // X64-NEXT: %and1.i.i = and <8 x i64> %0, + // X64-NEXT: %1 = bitcast <8 x i64> %and1.i.i to <16 x float> + // X64-NEXT: ret <16 x float> %1 return _mm512_abs_ps(a); } __m512 test_mm512_mask_abs_ps(__m512 __W, __mmask16 __U, __m512 __A){ - // CHECK-LABEL: @test_mm512_mask_abs_ps - // CHECK: and <16 x i32> - // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + // APPLE-LABEL: test_mm512_mask_abs_ps + // APPLE: entry: + // APPLE-NEXT: %0 = bitcast <16 x float> %__A to <16 x i32> + // APPLE-NEXT: %1 = and <16 x i32> %0, + // APPLE-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // APPLE-NEXT: %3 = bitcast <16 x i32> %1 to <16 x float> + // APPLE-NEXT: %4 = select <16 x i1> %2, <16 x float> %3, <16 x float> %__W + // APPLE-NEXT: ret <16 x float> %4 + // X64-LABEL: test_mm512_mask_abs_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <16 x float> %__A to <16 x i32> + // X64-NEXT: %1 = and <16 x i32> %0, + // X64-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // X64-NEXT: %3 = bitcast <16 x i32> %1 to <16 x float> + // X64-NEXT: %4 = select <16 x i1> %2, <16 x float> %3, <16 x float> %__W + // X64-NEXT: ret <16 x float> %4 return _mm512_mask_abs_ps( __W, __U, __A); } __m512d test_mm512_zextpd128_pd512(__m128d A) { - // CHECK-LABEL: test_mm512_zextpd128_pd512 - // CHECK: store <2 x double> zeroinitializer - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_zextpd128_pd512 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <2 x double> %A, <2 x double> zeroinitializer, <8 x i32> + // APPLE-NEXT: ret <8 x double> %shuffle.i + // X64-LABEL: test_mm512_zextpd128_pd512 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <2 x double> %A, <2 x double> zeroinitializer, <8 x i32> + // X64-NEXT: ret <8 x double> %shuffle.i return _mm512_zextpd128_pd512(A); } __m512d test_mm512_zextpd256_pd512(__m256d A) { - // CHECK-LABEL: test_mm512_zextpd256_pd512 - // CHECK: store <4 x double> zeroinitializer - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_zextpd256_pd512 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <4 x double> %A, <4 x double> zeroinitializer, <8 x i32> + // APPLE-NEXT: ret <8 x double> %shuffle.i + // X64-LABEL: test_mm512_zextpd256_pd512 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <4 x double> %A, <4 x double> zeroinitializer, <8 x i32> + // X64-NEXT: ret <8 x double> %shuffle.i return _mm512_zextpd256_pd512(A); } __m512 test_mm512_zextps128_ps512(__m128 A) { - // CHECK-LABEL: test_mm512_zextps128_ps512 - // CHECK: store <4 x float> zeroinitializer - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_zextps128_ps512 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <4 x float> %A, <4 x float> zeroinitializer, <16 x i32> + // APPLE-NEXT: ret <16 x float> %shuffle.i + // X64-LABEL: test_mm512_zextps128_ps512 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <4 x float> %A, <4 x float> zeroinitializer, <16 x i32> + // X64-NEXT: ret <16 x float> %shuffle.i return _mm512_zextps128_ps512(A); } __m512 test_mm512_zextps256_ps512(__m256 A) { - // CHECK-LABEL: test_mm512_zextps256_ps512 - // CHECK: store <8 x float> zeroinitializer - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> + // APPLE-LABEL: test_mm512_zextps256_ps512 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <8 x float> %A, <8 x float> zeroinitializer, <16 x i32> + // APPLE-NEXT: ret <16 x float> %shuffle.i + // X64-LABEL: test_mm512_zextps256_ps512 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <8 x float> %A, <8 x float> zeroinitializer, <16 x i32> + // X64-NEXT: ret <16 x float> %shuffle.i return _mm512_zextps256_ps512(A); } __m512i test_mm512_zextsi128_si512(__m128i A) { - // CHECK-LABEL: test_mm512_zextsi128_si512 - // CHECK: store <2 x i64> zeroinitializer - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_zextsi128_si512 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <2 x i64> %A, <2 x i64> zeroinitializer, <8 x i32> + // APPLE-NEXT: ret <8 x i64> %shuffle.i + // X64-LABEL: test_mm512_zextsi128_si512 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <2 x i64> %A, <2 x i64> zeroinitializer, <8 x i32> + // X64-NEXT: ret <8 x i64> %shuffle.i return _mm512_zextsi128_si512(A); } __m512i test_mm512_zextsi256_si512(__m256i A) { - // CHECK-LABEL: test_mm512_zextsi256_si512 - // CHECK: store <4 x i64> zeroinitializer - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> + // APPLE-LABEL: test_mm512_zextsi256_si512 + // APPLE: entry: + // APPLE-NEXT: %shuffle.i = shufflevector <4 x i64> %A, <4 x i64> zeroinitializer, <8 x i32> + // APPLE-NEXT: ret <8 x i64> %shuffle.i + // X64-LABEL: test_mm512_zextsi256_si512 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <4 x i64> %A, <4 x i64> zeroinitializer, <8 x i32> + // X64-NEXT: ret <8 x i64> %shuffle.i return _mm512_zextsi256_si512(A); } diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c --- a/clang/test/CodeGen/avx512vl-builtins.c +++ b/clang/test/CodeGen/avx512vl-builtins.c @@ -1,9876 +1,16186 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s - +// RUN: %clang_cc1 -O1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=X64 +// RUN: %clang_cc1 -O1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=X64 #include __mmask8 test_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpeq_epu32_mask - // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}} - // CHECK: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> + // X64-LABEL: test_mm_cmpeq_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp eq <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmpeq_epu32_mask(__a, __b); } __mmask8 test_mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpeq_epu32_mask - // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpeq_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp eq <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmpeq_epu32_mask(__u, __a, __b); } __mmask8 test_mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpeq_epu64_mask - // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}} - // CHECK: shufflevector <2 x i1> %{{.*}}, <2 x i1> zeroinitializer, <8 x i32> + // X64-LABEL: test_mm_cmpeq_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmpeq_epu64_mask(__a, __b); } __mmask8 test_mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpeq_epu64_mask - // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpeq_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmpeq_epu64_mask(__u, __a, __b); } __mmask8 test_mm_cmpge_epi32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpge_epi32_mask - // CHECK: icmp sge <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpge_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp sge <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmpge_epi32_mask(__a, __b); } __mmask8 test_mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpge_epi32_mask - // CHECK: icmp sge <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpge_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp sge <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmpge_epi32_mask(__u, __a, __b); } __mmask8 test_mm_cmpge_epi64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpge_epi64_mask - // CHECK: icmp sge <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpge_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sge <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmpge_epi64_mask(__a, __b); } __mmask8 test_mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpge_epi64_mask - // CHECK: icmp sge <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpge_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sge <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmpge_epi64_mask(__u, __a, __b); } __mmask8 test_mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpge_epi32_mask - // CHECK: icmp sge <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpge_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp sge <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm256_cmpge_epi32_mask(__a, __b); } __mmask8 test_mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpge_epi32_mask - // CHECK: icmp sge <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpge_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp sge <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %4 = and <8 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return (__mmask8)_mm256_mask_cmpge_epi32_mask(__u, __a, __b); } __mmask8 test_mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpge_epi64_mask - // CHECK: icmp sge <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpge_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sge <4 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm256_cmpge_epi64_mask(__a, __b); } __mmask8 test_mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpge_epi64_mask - // CHECK: icmp sge <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpge_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sge <4 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm256_mask_cmpge_epi64_mask(__u, __a, __b); } __mmask8 test_mm_cmpge_epu32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpge_epu32_mask - // CHECK: icmp uge <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpge_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp uge <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmpge_epu32_mask(__a, __b); } __mmask8 test_mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpge_epu32_mask - // CHECK: icmp uge <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpge_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp uge <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmpge_epu32_mask(__u, __a, __b); } __mmask8 test_mm_cmpge_epu64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpge_epu64_mask - // CHECK: icmp uge <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpge_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp uge <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmpge_epu64_mask(__a, __b); } __mmask8 test_mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpge_epu64_mask - // CHECK: icmp uge <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpge_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp uge <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmpge_epu64_mask(__u, __a, __b); } __mmask8 test_mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpge_epu32_mask - // CHECK: icmp uge <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpge_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp uge <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm256_cmpge_epu32_mask(__a, __b); } __mmask8 test_mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpge_epu32_mask - // CHECK: icmp uge <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpge_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp uge <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %4 = and <8 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return (__mmask8)_mm256_mask_cmpge_epu32_mask(__u, __a, __b); } __mmask8 test_mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpge_epu64_mask - // CHECK: icmp uge <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpge_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp uge <4 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm256_cmpge_epu64_mask(__a, __b); } __mmask8 test_mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpge_epu64_mask - // CHECK: icmp uge <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpge_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp uge <4 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm256_mask_cmpge_epu64_mask(__u, __a, __b); } __mmask8 test_mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpgt_epu32_mask - // CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpgt_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp ugt <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmpgt_epu32_mask(__a, __b); } __mmask8 test_mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpgt_epu32_mask - // CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpgt_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp ugt <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmpgt_epu32_mask(__u, __a, __b); } __mmask8 test_mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpgt_epu64_mask - // CHECK: icmp ugt <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpgt_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ugt <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmpgt_epu64_mask(__a, __b); } __mmask8 test_mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpgt_epu64_mask - // CHECK: icmp ugt <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpgt_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ugt <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmpgt_epu64_mask(__u, __a, __b); } __mmask8 test_mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpgt_epu32_mask - // CHECK: icmp ugt <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpgt_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp ugt <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm256_cmpgt_epu32_mask(__a, __b); } __mmask8 test_mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpgt_epu32_mask - // CHECK: icmp ugt <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpgt_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp ugt <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %4 = and <8 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return (__mmask8)_mm256_mask_cmpgt_epu32_mask(__u, __a, __b); } __mmask8 test_mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpgt_epu64_mask - // CHECK: icmp ugt <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpgt_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ugt <4 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm256_cmpgt_epu64_mask(__a, __b); } __mmask8 test_mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpgt_epu64_mask - // CHECK: icmp ugt <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpgt_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ugt <4 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm256_mask_cmpgt_epu64_mask(__u, __a, __b); } __mmask8 test_mm_cmple_epi32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmple_epi32_mask - // CHECK: icmp sle <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmple_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp sle <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmple_epi32_mask(__a, __b); } __mmask8 test_mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmple_epi32_mask - // CHECK: icmp sle <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmple_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp sle <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmple_epi32_mask(__u, __a, __b); } __mmask8 test_mm_cmple_epi64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmple_epi64_mask - // CHECK: icmp sle <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmple_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sle <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmple_epi64_mask(__a, __b); } __mmask8 test_mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmple_epi64_mask - // CHECK: icmp sle <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmple_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sle <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmple_epi64_mask(__u, __a, __b); } __mmask8 test_mm256_cmple_epi32_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmple_epi32_mask - // CHECK: icmp sle <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmple_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp sle <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm256_cmple_epi32_mask(__a, __b); } __mmask8 test_mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmple_epi32_mask - // CHECK: icmp sle <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmple_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp sle <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %4 = and <8 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return (__mmask8)_mm256_mask_cmple_epi32_mask(__u, __a, __b); } __mmask8 test_mm256_cmple_epi64_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmple_epi64_mask - // CHECK: icmp sle <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmple_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sle <4 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm256_cmple_epi64_mask(__a, __b); } __mmask8 test_mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmple_epi64_mask - // CHECK: icmp sle <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmple_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sle <4 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm256_mask_cmple_epi64_mask(__u, __a, __b); } __mmask8 test_mm_cmple_epu32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmple_epu32_mask - // CHECK: icmp ule <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmple_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp ule <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmple_epu32_mask(__a, __b); } __mmask8 test_mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmple_epu32_mask - // CHECK: icmp ule <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmple_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp ule <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmple_epu32_mask(__u, __a, __b); } __mmask8 test_mm_cmple_epu64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmple_epu64_mask - // CHECK: icmp ule <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmple_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ule <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmple_epu64_mask(__a, __b); } __mmask8 test_mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmple_epu64_mask - // CHECK: icmp ule <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmple_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ule <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmple_epu64_mask(__u, __a, __b); } __mmask8 test_mm256_cmple_epu32_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmple_epu32_mask - // CHECK: icmp ule <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmple_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp ule <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm256_cmple_epu32_mask(__a, __b); } __mmask8 test_mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmple_epu32_mask - // CHECK: icmp ule <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmple_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp ule <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %4 = and <8 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return (__mmask8)_mm256_mask_cmple_epu32_mask(__u, __a, __b); } __mmask8 test_mm256_cmple_epu64_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmple_epu64_mask - // CHECK: icmp ule <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmple_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ule <4 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm256_cmple_epu64_mask(__a, __b); } __mmask8 test_mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmple_epu64_mask - // CHECK: icmp ule <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmple_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ule <4 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm256_mask_cmple_epu64_mask(__u, __a, __b); } __mmask8 test_mm_cmplt_epi32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmplt_epi32_mask - // CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmplt_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp slt <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmplt_epi32_mask(__a, __b); } __mmask8 test_mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmplt_epi32_mask - // CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmplt_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp slt <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmplt_epi32_mask(__u, __a, __b); } __mmask8 test_mm_cmplt_epi64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmplt_epi64_mask - // CHECK: icmp slt <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmplt_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp slt <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmplt_epi64_mask(__a, __b); } __mmask8 test_mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmplt_epi64_mask - // CHECK: icmp slt <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmplt_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp slt <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmplt_epi64_mask(__u, __a, __b); } __mmask8 test_mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmplt_epi32_mask - // CHECK: icmp slt <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmplt_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp slt <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm256_cmplt_epi32_mask(__a, __b); } __mmask8 test_mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmplt_epi32_mask - // CHECK: icmp slt <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmplt_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp slt <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %4 = and <8 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return (__mmask8)_mm256_mask_cmplt_epi32_mask(__u, __a, __b); } __mmask8 test_mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmplt_epi64_mask - // CHECK: icmp slt <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmplt_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp slt <4 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm256_cmplt_epi64_mask(__a, __b); } __mmask8 test_mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmplt_epi64_mask - // CHECK: icmp slt <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmplt_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp slt <4 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm256_mask_cmplt_epi64_mask(__u, __a, __b); } __mmask8 test_mm_cmplt_epu32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmplt_epu32_mask - // CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmplt_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp ult <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmplt_epu32_mask(__a, __b); } __mmask8 test_mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmplt_epu32_mask - // CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmplt_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp ult <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmplt_epu32_mask(__u, __a, __b); } __mmask8 test_mm_cmplt_epu64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmplt_epu64_mask - // CHECK: icmp ult <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmplt_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ult <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmplt_epu64_mask(__a, __b); } __mmask8 test_mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmplt_epu64_mask - // CHECK: icmp ult <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmplt_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ult <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmplt_epu64_mask(__u, __a, __b); } __mmask8 test_mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmplt_epu32_mask - // CHECK: icmp ult <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmplt_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp ult <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm256_cmplt_epu32_mask(__a, __b); } __mmask8 test_mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmplt_epu32_mask - // CHECK: icmp ult <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmplt_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp ult <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %4 = and <8 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return (__mmask8)_mm256_mask_cmplt_epu32_mask(__u, __a, __b); } __mmask8 test_mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmplt_epu64_mask - // CHECK: icmp ult <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmplt_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ult <4 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm256_cmplt_epu64_mask(__a, __b); } __mmask8 test_mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmplt_epu64_mask - // CHECK: icmp ult <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmplt_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ult <4 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm256_mask_cmplt_epu64_mask(__u, __a, __b); } __mmask8 test_mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpneq_epi32_mask - // CHECK: icmp ne <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpneq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp ne <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmpneq_epi32_mask(__a, __b); } __mmask8 test_mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpneq_epi32_mask - // CHECK: icmp ne <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpneq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp ne <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmpneq_epi32_mask(__u, __a, __b); } __mmask8 test_mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpneq_epi64_mask - // CHECK: icmp ne <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpneq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ne <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmpneq_epi64_mask(__a, __b); } __mmask8 test_mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpneq_epi64_mask - // CHECK: icmp ne <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpneq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ne <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmpneq_epi64_mask(__u, __a, __b); } __mmask8 test_mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpneq_epi32_mask - // CHECK: icmp ne <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpneq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp ne <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm256_cmpneq_epi32_mask(__a, __b); } __mmask8 test_mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpneq_epi32_mask - // CHECK: icmp ne <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpneq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp ne <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %4 = and <8 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return (__mmask8)_mm256_mask_cmpneq_epi32_mask(__u, __a, __b); } __mmask8 test_mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpneq_epi64_mask - // CHECK: icmp ne <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpneq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ne <4 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm256_cmpneq_epi64_mask(__a, __b); } __mmask8 test_mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpneq_epi64_mask - // CHECK: icmp ne <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpneq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ne <4 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm256_mask_cmpneq_epi64_mask(__u, __a, __b); } __mmask8 test_mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpneq_epu32_mask - // CHECK: icmp ne <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpneq_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp ne <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmpneq_epu32_mask(__a, __b); } __mmask8 test_mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpneq_epu32_mask - // CHECK: icmp ne <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpneq_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp ne <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmpneq_epu32_mask(__u, __a, __b); } __mmask8 test_mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpneq_epu64_mask - // CHECK: icmp ne <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpneq_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ne <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmpneq_epu64_mask(__a, __b); } __mmask8 test_mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpneq_epu64_mask - // CHECK: icmp ne <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpneq_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ne <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmpneq_epu64_mask(__u, __a, __b); } __mmask8 test_mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpneq_epu32_mask - // CHECK: icmp ne <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpneq_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp ne <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm256_cmpneq_epu32_mask(__a, __b); } __mmask8 test_mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpneq_epu32_mask - // CHECK: icmp ne <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpneq_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp ne <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %4 = and <8 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return (__mmask8)_mm256_mask_cmpneq_epu32_mask(__u, __a, __b); } __mmask8 test_mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpneq_epu64_mask - // CHECK: icmp ne <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpneq_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ne <4 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm256_cmpneq_epu64_mask(__a, __b); } __mmask8 test_mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpneq_epu64_mask - // CHECK: icmp ne <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpneq_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp ne <4 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm256_mask_cmpneq_epu64_mask(__u, __a, __b); } __mmask8 test_mm_cmp_eq_epi32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmp_eq_epi32_mask - // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_eq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp eq <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmp_epi32_mask(__a, __b, _MM_CMPINT_EQ); } __mmask8 test_mm_mask_cmp_lt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmp_lt_epi32_mask - // CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmp_lt_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp slt <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmp_epi32_mask(__u, __a, __b, _MM_CMPINT_LT); } __mmask8 test_mm_cmp_lt_epi64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmp_lt_epi64_mask - // CHECK: icmp slt <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_lt_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp slt <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmp_epi64_mask(__a, __b, _MM_CMPINT_LT); } __mmask8 test_mm_mask_cmp_eq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmp_eq_epi64_mask - // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmp_eq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmp_epi64_mask(__u, __a, __b, _MM_CMPINT_EQ); } __mmask8 test_mm256_cmp_eq_epi32_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmp_eq_epi32_mask - // CHECK: icmp eq <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_eq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp eq <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm256_cmp_epi32_mask(__a, __b, _MM_CMPINT_EQ); } __mmask8 test_mm256_mask_cmp_le_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmp_le_epi32_mask - // CHECK: icmp sle <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmp_le_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp sle <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %4 = and <8 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return (__mmask8)_mm256_mask_cmp_epi32_mask(__u, __a, __b, _MM_CMPINT_LE); } __mmask8 test_mm256_cmp_eq_epi64_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmp_eq_epi64_mask - // CHECK: icmp eq <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_eq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <4 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm256_cmp_epi64_mask(__a, __b, _MM_CMPINT_EQ); } __mmask8 test_mm256_mask_cmp_eq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmp_eq_epi64_mask - // CHECK: icmp eq <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmp_eq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <4 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm256_mask_cmp_epi64_mask(__u, __a, __b, _MM_CMPINT_EQ); } __mmask8 test_mm_cmp_epu32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmp_epu32_mask - // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp eq <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmp_epu32_mask(__a, __b, 0); } __mmask8 test_mm_mask_cmp_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmp_epu32_mask - // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmp_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp eq <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmp_epu32_mask(__u, __a, __b, 0); } __mmask8 test_mm_cmp_epu64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmp_epu64_mask - // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmp_epu64_mask(__a, __b, 0); } __mmask8 test_mm_mask_cmp_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmp_epu64_mask - // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmp_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmp_epu64_mask(__u, __a, __b, 0); } __mmask8 test_mm256_cmp_epu32_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmp_epu32_mask - // CHECK: icmp eq <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp eq <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm256_cmp_epu32_mask(__a, __b, 0); } __mmask8 test_mm256_mask_cmp_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmp_epu32_mask - // CHECK: icmp eq <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmp_epu32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp eq <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %4 = and <8 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return (__mmask8)_mm256_mask_cmp_epu32_mask(__u, __a, __b, 0); } __mmask8 test_mm256_cmp_epu64_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmp_epu64_mask - // CHECK: icmp eq <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <4 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm256_cmp_epu64_mask(__a, __b, 0); } __mmask8 test_mm256_mask_cmp_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmp_epu64_mask - // CHECK: icmp eq <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmp_epu64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <4 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm256_mask_cmp_epu64_mask(__u, __a, __b, 0); } __m256i test_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_add_epi32 - //CHECK: add <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_add_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %add.i.i = add <8 x i32> %1, %0 + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %add.i.i, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_add_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_add_epi32 - //CHECK: add <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_add_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %add.i.i = add <8 x i32> %1, %0 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %add.i.i, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_add_epi32(__U, __A, __B); } __m256i test_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_add_epi64 - //CHECK: add <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_add_epi64 + // X64: entry: + // X64-NEXT: %add.i.i = add <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %add.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_add_epi64(__W,__U,__A,__B); } __m256i test_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_add_epi64 - //CHECK: add <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_add_epi64 + // X64: entry: + // X64-NEXT: %add.i.i = add <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %add.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_add_epi64 (__U,__A,__B); } __m256i test_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_sub_epi32 - //CHECK: sub <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_sub_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %sub.i.i = sub <8 x i32> %0, %1 + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %sub.i.i, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_sub_epi32 (__W,__U,__A,__B); } __m256i test_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_sub_epi32 - //CHECK: sub <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_sub_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %sub.i.i = sub <8 x i32> %0, %1 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %sub.i.i, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_sub_epi32 (__U,__A,__B); } __m256i test_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_sub_epi64 - //CHECK: sub <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_sub_epi64 + // X64: entry: + // X64-NEXT: %sub.i.i = sub <4 x i64> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %sub.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_sub_epi64 (__W,__U,__A,__B); } __m256i test_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_sub_epi64 - //CHECK: sub <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_sub_epi64 + // X64: entry: + // X64-NEXT: %sub.i.i = sub <4 x i64> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %sub.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_sub_epi64 (__U,__A,__B); } __m128i test_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_add_epi32 - //CHECK: add <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_add_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %add.i.i = add <4 x i32> %1, %0 + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %add.i.i, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_add_epi32(__W,__U,__A,__B); } __m128i test_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_add_epi32 - //CHECK: add <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_add_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %add.i.i = add <4 x i32> %1, %0 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %add.i.i, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_add_epi32 (__U,__A,__B); } __m128i test_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_add_epi64 - //CHECK: add <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_add_epi64 + // X64: entry: + // X64-NEXT: %add.i.i = add <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %add.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_add_epi64 (__W,__U,__A,__B); } __m128i test_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_add_epi64 - //CHECK: add <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_add_epi64 + // X64: entry: + // X64-NEXT: %add.i.i = add <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %add.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_add_epi64 (__U,__A,__B); } __m128i test_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_sub_epi32 - //CHECK: sub <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_sub_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %sub.i.i = sub <4 x i32> %0, %1 + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %sub.i.i, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_sub_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_sub_epi32 - //CHECK: sub <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_sub_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %sub.i.i = sub <4 x i32> %0, %1 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %sub.i.i, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_sub_epi32(__U, __A, __B); } __m128i test_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_sub_epi64 - //CHECK: sub <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_sub_epi64 + // X64: entry: + // X64-NEXT: %sub.i.i = sub <2 x i64> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %sub.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_sub_epi64 (__W, __U, __A, __B); } __m128i test_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_sub_epi64 - //CHECK: sub <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_sub_epi64 + // X64: entry: + // X64-NEXT: %sub.i.i = sub <2 x i64> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %sub.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_sub_epi64 (__U, __A, __B); } __m256i test_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - //CHECK-LABEL: @test_mm256_mask_mul_epi32 - //CHECK: shl <4 x i64> %{{.*}}, - //CHECK: ashr <4 x i64> %{{.*}}, - //CHECK: shl <4 x i64> %{{.*}}, - //CHECK: ashr <4 x i64> %{{.*}}, - //CHECK: mul <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_mul_epi32 + // X64: entry: + // X64-NEXT: %0 = shl <4 x i64> %__X, + // X64-NEXT: %1 = ashr exact <4 x i64> %0, + // X64-NEXT: %2 = shl <4 x i64> %__Y, + // X64-NEXT: %3 = ashr exact <4 x i64> %2, + // X64-NEXT: %4 = mul nsw <4 x i64> %3, %1 + // X64-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %5, <8 x i1> undef, <4 x i32> + // X64-NEXT: %6 = select <4 x i1> %extract.i, <4 x i64> %4, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask_mul_epi32(__W, __M, __X, __Y); } __m256i test_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) { - //CHECK-LABEL: @test_mm256_maskz_mul_epi32 - //CHECK: shl <4 x i64> %{{.*}}, - //CHECK: ashr <4 x i64> %{{.*}}, - //CHECK: shl <4 x i64> %{{.*}}, - //CHECK: ashr <4 x i64> %{{.*}}, - //CHECK: mul <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_mul_epi32 + // X64: entry: + // X64-NEXT: %0 = shl <4 x i64> %__X, + // X64-NEXT: %1 = ashr exact <4 x i64> %0, + // X64-NEXT: %2 = shl <4 x i64> %__Y, + // X64-NEXT: %3 = ashr exact <4 x i64> %2, + // X64-NEXT: %4 = mul nsw <4 x i64> %3, %1 + // X64-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %5, <8 x i1> undef, <4 x i32> + // X64-NEXT: %6 = select <4 x i1> %extract.i, <4 x i64> %4, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %6 return _mm256_maskz_mul_epi32(__M, __X, __Y); } __m128i test_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { - //CHECK-LABEL: @test_mm_mask_mul_epi32 - //CHECK: shl <2 x i64> %{{.*}}, - //CHECK: ashr <2 x i64> %{{.*}}, - //CHECK: shl <2 x i64> %{{.*}}, - //CHECK: ashr <2 x i64> %{{.*}}, - //CHECK: mul <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_mul_epi32 + // X64: entry: + // X64-NEXT: %0 = shl <2 x i64> %__X, + // X64-NEXT: %1 = ashr exact <2 x i64> %0, + // X64-NEXT: %2 = shl <2 x i64> %__Y, + // X64-NEXT: %3 = ashr exact <2 x i64> %2, + // X64-NEXT: %4 = mul nsw <2 x i64> %3, %1 + // X64-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %5, <8 x i1> undef, <2 x i32> + // X64-NEXT: %6 = select <2 x i1> %extract.i, <2 x i64> %4, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %6 return _mm_mask_mul_epi32(__W, __M, __X, __Y); } __m128i test_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) { - //CHECK-LABEL: @test_mm_maskz_mul_epi32 - //CHECK: shl <2 x i64> %{{.*}}, - //CHECK: ashr <2 x i64> %{{.*}}, - //CHECK: shl <2 x i64> %{{.*}}, - //CHECK: ashr <2 x i64> %{{.*}}, - //CHECK: mul <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_mul_epi32 + // X64: entry: + // X64-NEXT: %0 = shl <2 x i64> %__X, + // X64-NEXT: %1 = ashr exact <2 x i64> %0, + // X64-NEXT: %2 = shl <2 x i64> %__Y, + // X64-NEXT: %3 = ashr exact <2 x i64> %2, + // X64-NEXT: %4 = mul nsw <2 x i64> %3, %1 + // X64-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %5, <8 x i1> undef, <2 x i32> + // X64-NEXT: %6 = select <2 x i1> %extract.i, <2 x i64> %4, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %6 return _mm_maskz_mul_epi32(__M, __X, __Y); } __m256i test_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - //CHECK-LABEL: @test_mm256_mask_mul_epu32 - //CHECK: and <4 x i64> %{{.*}}, - //CHECK: and <4 x i64> %{{.*}}, - //CHECK: mul <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_mul_epu32 + // X64: entry: + // X64-NEXT: %0 = and <4 x i64> %__X, + // X64-NEXT: %1 = and <4 x i64> %__Y, + // X64-NEXT: %2 = mul nuw <4 x i64> %1, %0 + // X64-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i64> %2, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_mul_epu32(__W, __M, __X, __Y); } __m256i test_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) { - //CHECK-LABEL: @test_mm256_maskz_mul_epu32 - //CHECK: and <4 x i64> %{{.*}}, - //CHECK: and <4 x i64> %{{.*}}, - //CHECK: mul <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_mul_epu32 + // X64: entry: + // X64-NEXT: %0 = and <4 x i64> %__X, + // X64-NEXT: %1 = and <4 x i64> %__Y, + // X64-NEXT: %2 = mul nuw <4 x i64> %1, %0 + // X64-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i64> %2, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_mul_epu32(__M, __X, __Y); } __m128i test_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { - //CHECK-LABEL: @test_mm_mask_mul_epu32 - //CHECK: and <2 x i64> %{{.*}}, - //CHECK: and <2 x i64> %{{.*}}, - //CHECK: mul <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_mul_epu32 + // X64: entry: + // X64-NEXT: %0 = and <2 x i64> %__X, + // X64-NEXT: %1 = and <2 x i64> %__Y, + // X64-NEXT: %2 = mul nuw <2 x i64> %1, %0 + // X64-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> + // X64-NEXT: %4 = select <2 x i1> %extract.i, <2 x i64> %2, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_mul_epu32(__W, __M, __X, __Y); } __m128i test_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) { - //CHECK-LABEL: @test_mm_maskz_mul_epu32 - //CHECK: and <2 x i64> %{{.*}}, - //CHECK: and <2 x i64> %{{.*}}, - //CHECK: mul <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_mul_epu32 + // X64: entry: + // X64-NEXT: %0 = and <2 x i64> %__X, + // X64-NEXT: %1 = and <2 x i64> %__Y, + // X64-NEXT: %2 = mul nuw <2 x i64> %1, %0 + // X64-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> + // X64-NEXT: %4 = select <2 x i1> %extract.i, <2 x i64> %2, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_mul_epu32(__M, __X, __Y); } __m128i test_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_mullo_epi32 - //CHECK: mul <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_mullo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %mul.i.i = mul <4 x i32> %1, %0 + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %mul.i.i, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_mullo_epi32(__M, __A, __B); } __m128i test_mm_mask_mullo_epi32 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_mullo_epi32 - //CHECK: mul <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_mullo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %mul.i.i = mul <4 x i32> %1, %0 + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %mul.i.i, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_mullo_epi32(__W, __M, __A, __B); } __m256i test_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_mullo_epi32 - //CHECK: mul <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_mullo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %mul.i.i = mul <8 x i32> %1, %0 + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %mul.i.i, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_mullo_epi32(__M, __A, __B); } __m256i test_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_mullo_epi32 - //CHECK: mul <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_mullo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %mul.i.i = mul <8 x i32> %1, %0 + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %mul.i.i, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_mullo_epi32(__W, __M, __A, __B); } __m256i test_mm256_and_epi32 (__m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_and_epi32 - //CHECK: and <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_and_epi32 + // X64: entry: + // X64-NEXT: %and1.i = and <4 x i64> %__B, %__A + // X64-NEXT: ret <4 x i64> %and1.i return _mm256_and_epi32(__A, __B); } __m256i test_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_and_epi32 - //CHECK: and <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_and_epi32 + // X64: entry: + // X64-NEXT: %and1.i.i = and <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <4 x i64> %and1.i.i to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_and_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_and_epi32 - //CHECK: and <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_and_epi32 + // X64: entry: + // X64-NEXT: %and1.i.i.i = and <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <4 x i64> %and1.i.i.i to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> %0, <8 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_and_epi32(__U, __A, __B); } __m128i test_mm_and_epi32 (__m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_and_epi32 - //CHECK: and <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_and_epi32 + // X64: entry: + // X64-NEXT: %and1.i = and <2 x i64> %__B, %__A + // X64-NEXT: ret <2 x i64> %and1.i return _mm_and_epi32(__A, __B); } __m128i test_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_and_epi32 - //CHECK: and <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_and_epi32 + // X64: entry: + // X64-NEXT: %and1.i.i = and <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <2 x i64> %and1.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_and_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_and_epi32 - //CHECK: and <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_and_epi32 + // X64: entry: + // X64-NEXT: %and1.i.i.i = and <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <2 x i64> %and1.i.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i.i, <4 x i32> %0, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_and_epi32(__U, __A, __B); } __m256i test_mm256_andnot_epi32 (__m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_andnot_epi32 - //CHECK: xor <8 x i32> %{{.*}}, - //CHECK: and <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_andnot_epi32 + // X64: entry: + // X64-NEXT: %0 = xor <4 x i64> %__A, + // X64-NEXT: %1 = and <4 x i64> %0, %__B + // X64-NEXT: ret <4 x i64> %1 return _mm256_andnot_epi32(__A, __B); } __m256i test_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_andnot_epi32 - //CHECK: xor <8 x i32> %{{.*}}, - //CHECK: and <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_andnot_epi32 + // X64: entry: + // X64-NEXT: %0 = xor <4 x i64> %__A, + // X64-NEXT: %1 = and <4 x i64> %0, %__B + // X64-NEXT: %2 = bitcast <4 x i64> %1 to <8 x i32> + // X64-NEXT: %3 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask_andnot_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_andnot_epi32 - //CHECK: xor <8 x i32> %{{.*}}, - //CHECK: and <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_andnot_epi32 + // X64: entry: + // X64-NEXT: %0 = xor <4 x i64> %__A, + // X64-NEXT: %1 = and <4 x i64> %0, %__B + // X64-NEXT: %2 = bitcast <4 x i64> %1 to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_maskz_andnot_epi32(__U, __A, __B); } __m128i test_mm_andnot_epi32 (__m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_andnot_epi32 - //CHECK: xor <4 x i32> %{{.*}}, - //CHECK: and <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_andnot_epi32 + // X64: entry: + // X64-NEXT: %0 = xor <2 x i64> %__A, + // X64-NEXT: %1 = and <2 x i64> %0, %__B + // X64-NEXT: ret <2 x i64> %1 return _mm_andnot_epi32(__A, __B); } __m128i test_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_andnot_epi32 - //CHECK: xor <4 x i32> %{{.*}}, - //CHECK: and <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_andnot_epi32 + // X64: entry: + // X64-NEXT: %0 = xor <2 x i64> %__A, + // X64-NEXT: %1 = and <2 x i64> %0, %__B + // X64-NEXT: %2 = bitcast <2 x i64> %1 to <4 x i32> + // X64-NEXT: %3 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> %3 + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_mask_andnot_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_andnot_epi32 - //CHECK: xor <4 x i32> %{{.*}}, - //CHECK: and <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_andnot_epi32 + // X64: entry: + // X64-NEXT: %0 = xor <2 x i64> %__A, + // X64-NEXT: %1 = and <2 x i64> %0, %__B + // X64-NEXT: %2 = bitcast <2 x i64> %1 to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i.i, <4 x i32> %2, <4 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_maskz_andnot_epi32(__U, __A, __B); } __m256i test_mm256_or_epi32 (__m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_or_epi32 - //CHECK: or <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_or_epi32 + // X64: entry: + // X64-NEXT: %or1.i = or <4 x i64> %__B, %__A + // X64-NEXT: ret <4 x i64> %or1.i return _mm256_or_epi32(__A, __B); } __m256i test_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_or_epi32 - //CHECK: or <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_or_epi32 + // X64: entry: + // X64-NEXT: %or1.i.i = or <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <4 x i64> %or1.i.i to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_or_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_or_epi32 - //CHECK: or <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} - return _mm256_maskz_or_epi32(__U, __A, __B); + // X64-LABEL: test_mm256_maskz_or_epi32 + // X64: entry: + // X64-NEXT: %or1.i.i.i = or <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <4 x i64> %or1.i.i.i to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> %0, <8 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 + return _mm256_maskz_or_epi32(__U, __A, __B); } __m128i test_mm_or_epi32 (__m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_or_epi32 - //CHECK: or <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_or_epi32 + // X64: entry: + // X64-NEXT: %or1.i = or <2 x i64> %__B, %__A + // X64-NEXT: ret <2 x i64> %or1.i return _mm_or_epi32(__A, __B); } __m128i test_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_or_epi32 - //CHECK: or <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_or_epi32 + // X64: entry: + // X64-NEXT: %or1.i.i = or <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <2 x i64> %or1.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_or_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_or_epi32 - //CHECK: or <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_or_epi32 + // X64: entry: + // X64-NEXT: %or1.i.i.i = or <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <2 x i64> %or1.i.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i.i, <4 x i32> %0, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_or_epi32(__U, __A, __B); } __m256i test_mm256_xor_epi32 (__m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_xor_epi32 - //CHECK: or <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_xor_epi32 + // X64: entry: + // X64-NEXT: %xor1.i = xor <4 x i64> %__B, %__A + // X64-NEXT: ret <4 x i64> %xor1.i return _mm256_xor_epi32(__A, __B); } __m256i test_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_xor_epi32 - //CHECK: xor <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_xor_epi32 + // X64: entry: + // X64-NEXT: %xor1.i.i = xor <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <4 x i64> %xor1.i.i to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_xor_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_xor_epi32 - //CHECK: xor <8 x i32> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_xor_epi32 + // X64: entry: + // X64-NEXT: %xor1.i.i.i = xor <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <4 x i64> %xor1.i.i.i to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> %0, <8 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_xor_epi32(__U, __A, __B); } __m128i test_mm_xor_epi32 (__m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_xor_epi32 - //CHECK: xor <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_xor_epi32 + // X64: entry: + // X64-NEXT: %xor1.i = xor <2 x i64> %__B, %__A + // X64-NEXT: ret <2 x i64> %xor1.i return _mm_xor_epi32(__A, __B); } __m128i test_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_xor_epi32 - //CHECK: xor <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_xor_epi32 + // X64: entry: + // X64-NEXT: %xor1.i.i = xor <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <2 x i64> %xor1.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_xor_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_xor_epi32 - //CHECK: xor <4 x i32> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_xor_epi32 + // X64: entry: + // X64-NEXT: %xor1.i.i.i = xor <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <2 x i64> %xor1.i.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i.i, <4 x i32> %0, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_xor_epi32(__U, __A, __B); } __m256i test_mm256_and_epi64 (__m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_and_epi64 - //CHECK: and <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_and_epi64 + // X64: entry: + // X64-NEXT: %and.i = and <4 x i64> %__B, %__A + // X64-NEXT: ret <4 x i64> %and.i return _mm256_and_epi64(__A, __B); } __m256i test_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_and_epi64 - //CHECK: and <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_and_epi64 + // X64: entry: + // X64-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %and.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_and_epi64(__W, __U, __A, __B); } __m256i test_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_and_epi64 - //CHECK: and <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_and_epi64 + // X64: entry: + // X64-NEXT: %and.i.i.i = and <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i.i, <4 x i64> %and.i.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_and_epi64(__U, __A, __B); } __m128i test_mm_and_epi64 (__m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_and_epi64 - //CHECK: and <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_and_epi64 + // X64: entry: + // X64-NEXT: %and.i = and <2 x i64> %__B, %__A + // X64-NEXT: ret <2 x i64> %and.i return _mm_and_epi64(__A, __B); } __m128i test_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_and_epi64 - //CHECK: and <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_and_epi64 + // X64: entry: + // X64-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %and.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_and_epi64(__W,__U, __A, __B); } __m128i test_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_and_epi64 - //CHECK: and <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_and_epi64 + // X64: entry: + // X64-NEXT: %and.i.i.i = and <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i.i, <2 x i64> %and.i.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_and_epi64(__U, __A, __B); } __m256i test_mm256_andnot_epi64 (__m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_andnot_epi64 - //CHECK: xor <4 x i64> %{{.*}}, - //CHECK: and <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_andnot_epi64 + // X64: entry: + // X64-NEXT: %neg.i = xor <4 x i64> %__A, + // X64-NEXT: %and.i = and <4 x i64> %neg.i, %__B + // X64-NEXT: ret <4 x i64> %and.i return _mm256_andnot_epi64(__A, __B); } __m256i test_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_andnot_epi64 - //CHECK: xor <4 x i64> %{{.*}}, - //CHECK: and <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_andnot_epi64 + // X64: entry: + // X64-NEXT: %neg.i.i = xor <4 x i64> %__A, + // X64-NEXT: %and.i.i = and <4 x i64> %neg.i.i, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %and.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_andnot_epi64(__W, __U, __A, __B); } __m256i test_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_andnot_epi64 - //CHECK: xor <4 x i64> %{{.*}}, - //CHECK: and <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_andnot_epi64 + // X64: entry: + // X64-NEXT: %neg.i.i.i = xor <4 x i64> %__A, + // X64-NEXT: %and.i.i.i = and <4 x i64> %neg.i.i.i, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i.i, <4 x i64> %and.i.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_andnot_epi64(__U, __A, __B); } __m128i test_mm_andnot_epi64 (__m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_andnot_epi64 - //CHECK: xor <2 x i64> %{{.*}}, - //CHECK: and <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_andnot_epi64 + // X64: entry: + // X64-NEXT: %neg.i = xor <2 x i64> %__A, + // X64-NEXT: %and.i = and <2 x i64> %neg.i, %__B + // X64-NEXT: ret <2 x i64> %and.i return _mm_andnot_epi64(__A, __B); } __m128i test_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_andnot_epi64 - //CHECK: xor <2 x i64> %{{.*}}, - //CHECK: and <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_andnot_epi64 + // X64: entry: + // X64-NEXT: %neg.i.i = xor <2 x i64> %__A, + // X64-NEXT: %and.i.i = and <2 x i64> %neg.i.i, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %and.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_andnot_epi64(__W,__U, __A, __B); } __m128i test_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_andnot_epi64 - //CHECK: xor <2 x i64> %{{.*}}, - //CHECK: and <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_andnot_epi64 + // X64: entry: + // X64-NEXT: %neg.i.i.i = xor <2 x i64> %__A, + // X64-NEXT: %and.i.i.i = and <2 x i64> %neg.i.i.i, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i.i, <2 x i64> %and.i.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_andnot_epi64(__U, __A, __B); } __m256i test_mm256_or_epi64 (__m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_or_epi64 - //CHECK: or <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_or_epi64 + // X64: entry: + // X64-NEXT: %or.i = or <4 x i64> %__B, %__A + // X64-NEXT: ret <4 x i64> %or.i return _mm256_or_epi64(__A, __B); } __m256i test_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_or_epi64 - //CHECK: or <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_or_epi64 + // X64: entry: + // X64-NEXT: %or.i.i = or <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %or.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_or_epi64(__W,__U, __A, __B); } __m256i test_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_or_epi64 - //CHECK: or <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_or_epi64 + // X64: entry: + // X64-NEXT: %or.i.i.i = or <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i.i, <4 x i64> %or.i.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_or_epi64(__U, __A, __B); } __m128i test_mm_or_epi64 (__m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_or_epi64 - //CHECK: or <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_or_epi64 + // X64: entry: + // X64-NEXT: %or.i = or <2 x i64> %__B, %__A + // X64-NEXT: ret <2 x i64> %or.i return _mm_or_epi64(__A, __B); } __m128i test_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_or_epi64 - //CHECK: or <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_or_epi64 + // X64: entry: + // X64-NEXT: %or.i.i = or <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %or.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_or_epi64(__W, __U, __A, __B); } __m128i test_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_or_epi64 - //CHECK: or <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_or_epi64 + // X64: entry: + // X64-NEXT: %or.i.i.i = or <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i.i, <2 x i64> %or.i.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_or_epi64( __U, __A, __B); } __m256i test_mm256_xor_epi64 (__m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_xor_epi64 - //CHECK: xor <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_xor_epi64 + // X64: entry: + // X64-NEXT: %xor.i = xor <4 x i64> %__B, %__A + // X64-NEXT: ret <4 x i64> %xor.i return _mm256_xor_epi64(__A, __B); } __m256i test_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_xor_epi64 - //CHECK: xor <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_xor_epi64 + // X64: entry: + // X64-NEXT: %xor.i.i = xor <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %xor.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_xor_epi64(__W,__U, __A, __B); } __m256i test_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_xor_epi64 - //CHECK: xor <4 x i64> %{{.*}}, %{{.*}} - //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_xor_epi64 + // X64: entry: + // X64-NEXT: %xor.i.i.i = xor <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i.i, <4 x i64> %xor.i.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_xor_epi64(__U, __A, __B); } __m128i test_mm_xor_epi64 (__m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_xor_epi64 - //CHECK: xor <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_xor_epi64 + // X64: entry: + // X64-NEXT: %xor.i = xor <2 x i64> %__B, %__A + // X64-NEXT: ret <2 x i64> %xor.i return _mm_xor_epi64(__A, __B); } __m128i test_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_xor_epi64 - //CHECK: xor <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_xor_epi64 + // X64: entry: + // X64-NEXT: %xor.i.i = xor <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %xor.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_xor_epi64(__W, __U, __A, __B); } __m128i test_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_xor_epi64 - //CHECK: xor <2 x i64> %{{.*}}, %{{.*}} - //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_xor_epi64 + // X64: entry: + // X64-NEXT: %xor.i.i.i = xor <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i.i, <2 x i64> %xor.i.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_xor_epi64( __U, __A, __B); } __mmask8 test_mm256_cmp_ps_mask_eq_oq(__m256 a, __m256 b) { - // CHECK-LABEL: @test_mm256_cmp_ps_mask_eq_oq - // CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_eq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_EQ_OQ); } __mmask8 test_mm256_cmp_ps_mask_lt_os(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_lt_os - // CHECK: fcmp olt <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_lt_os + // X64: entry: + // X64-NEXT: %0 = fcmp olt <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_LT_OS); } __mmask8 test_mm256_cmp_ps_mask_le_os(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_le_os - // CHECK: fcmp ole <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_le_os + // X64: entry: + // X64-NEXT: %0 = fcmp ole <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_LE_OS); } __mmask8 test_mm256_cmp_ps_mask_unord_q(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_unord_q - // CHECK: fcmp uno <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_unord_q + // X64: entry: + // X64-NEXT: %0 = fcmp uno <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_UNORD_Q); } __mmask8 test_mm256_cmp_ps_mask_neq_uq(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_neq_uq - // CHECK: fcmp une <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_neq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp une <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_NEQ_UQ); } __mmask8 test_mm256_cmp_ps_mask_nlt_us(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_nlt_us - // CHECK: fcmp uge <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_nlt_us + // X64: entry: + // X64-NEXT: %0 = fcmp uge <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_NLT_US); } __mmask8 test_mm256_cmp_ps_mask_nle_us(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_nle_us - // CHECK: fcmp ugt <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_nle_us + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_NLE_US); } __mmask8 test_mm256_cmp_ps_mask_ord_q(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_ord_q - // CHECK: fcmp ord <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_ord_q + // X64: entry: + // X64-NEXT: %0 = fcmp ord <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_ORD_Q); } __mmask8 test_mm256_cmp_ps_mask_eq_uq(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_eq_uq - // CHECK: fcmp ueq <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_eq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_EQ_UQ); } __mmask8 test_mm256_cmp_ps_mask_nge_us(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_nge_us - // CHECK: fcmp ult <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_nge_us + // X64: entry: + // X64-NEXT: %0 = fcmp ult <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_NGE_US); } __mmask8 test_mm256_cmp_ps_mask_ngt_us(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_ngt_us - // CHECK: fcmp ule <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_ngt_us + // X64: entry: + // X64-NEXT: %0 = fcmp ule <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_NGT_US); } __mmask8 test_mm256_cmp_ps_mask_false_oq(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_false_oq - // CHECK: fcmp false <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_false_oq + // X64: entry: + // X64-NEXT: ret i8 0 return _mm256_cmp_ps_mask(a, b, _CMP_FALSE_OQ); } __mmask8 test_mm256_cmp_ps_mask_neq_oq(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_neq_oq - // CHECK: fcmp one <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_neq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp one <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_NEQ_OQ); } __mmask8 test_mm256_cmp_ps_mask_ge_os(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_ge_os - // CHECK: fcmp oge <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_ge_os + // X64: entry: + // X64-NEXT: %0 = fcmp oge <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_GE_OS); } __mmask8 test_mm256_cmp_ps_mask_gt_os(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_gt_os - // CHECK: fcmp ogt <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_gt_os + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_GT_OS); } __mmask8 test_mm256_cmp_ps_mask_true_uq(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_true_uq - // CHECK: fcmp true <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_true_uq + // X64: entry: + // X64-NEXT: ret i8 -1 return _mm256_cmp_ps_mask(a, b, _CMP_TRUE_UQ); } __mmask8 test_mm256_cmp_ps_mask_eq_os(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_eq_os - // CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_eq_os + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_EQ_OS); } __mmask8 test_mm256_cmp_ps_mask_lt_oq(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_lt_oq - // CHECK: fcmp olt <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_lt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp olt <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_LT_OQ); } __mmask8 test_mm256_cmp_ps_mask_le_oq(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_le_oq - // CHECK: fcmp ole <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_le_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ole <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_LE_OQ); } __mmask8 test_mm256_cmp_ps_mask_unord_s(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_unord_s - // CHECK: fcmp uno <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_unord_s + // X64: entry: + // X64-NEXT: %0 = fcmp uno <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_UNORD_S); } __mmask8 test_mm256_cmp_ps_mask_neq_us(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_neq_us - // CHECK: fcmp une <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_neq_us + // X64: entry: + // X64-NEXT: %0 = fcmp une <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_NEQ_US); } __mmask8 test_mm256_cmp_ps_mask_nlt_uq(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_nlt_uq - // CHECK: fcmp uge <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_nlt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp uge <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_NLT_UQ); } __mmask8 test_mm256_cmp_ps_mask_nle_uq(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_nle_uq - // CHECK: fcmp ugt <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_nle_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_NLE_UQ); } __mmask8 test_mm256_cmp_ps_mask_ord_s(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_ord_s - // CHECK: fcmp ord <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_ord_s + // X64: entry: + // X64-NEXT: %0 = fcmp ord <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_ORD_S); } __mmask8 test_mm256_cmp_ps_mask_eq_us(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_eq_us - // CHECK: fcmp ueq <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_eq_us + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_EQ_US); } __mmask8 test_mm256_cmp_ps_mask_nge_uq(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_nge_uq - // CHECK: fcmp ult <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_nge_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ult <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_NGE_UQ); } __mmask8 test_mm256_cmp_ps_mask_ngt_uq(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_ngt_uq - // CHECK: fcmp ule <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_ngt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ule <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_NGT_UQ); } __mmask8 test_mm256_cmp_ps_mask_false_os(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_false_os - // CHECK: fcmp false <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_false_os + // X64: entry: + // X64-NEXT: ret i8 0 return _mm256_cmp_ps_mask(a, b, _CMP_FALSE_OS); } __mmask8 test_mm256_cmp_ps_mask_neq_os(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_neq_os - // CHECK: fcmp one <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_neq_os + // X64: entry: + // X64-NEXT: %0 = fcmp one <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_NEQ_OS); } __mmask8 test_mm256_cmp_ps_mask_ge_oq(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_ge_oq - // CHECK: fcmp oge <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_ge_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oge <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_GE_OQ); } __mmask8 test_mm256_cmp_ps_mask_gt_oq(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_gt_oq - // CHECK: fcmp ogt <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_gt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <8 x float> %a, %b + // X64-NEXT: %1 = bitcast <8 x i1> %0 to i8 + // X64-NEXT: ret i8 %1 return _mm256_cmp_ps_mask(a, b, _CMP_GT_OQ); } __mmask8 test_mm256_cmp_ps_mask_true_us(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_cmp_ps_mask_true_us - // CHECK: fcmp true <8 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_ps_mask_true_us + // X64: entry: + // X64-NEXT: ret i8 -1 return _mm256_cmp_ps_mask(a, b, _CMP_TRUE_US); } __mmask8 test_mm256_mask_cmp_ps_mask_eq_oq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: @test_mm256_mask_cmp_ps_mask_eq_oq - // CHECK: [[CMP:%.*]] = fcmp oeq <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_eq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_lt_os(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_lt_os - // CHECK: [[CMP:%.*]] = fcmp olt <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_lt_os + // X64: entry: + // X64-NEXT: %0 = fcmp olt <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_le_os(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_le_os - // CHECK: [[CMP:%.*]] = fcmp ole <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_le_os + // X64: entry: + // X64-NEXT: %0 = fcmp ole <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_unord_q(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_unord_q - // CHECK: [[CMP:%.*]] = fcmp uno <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_unord_q + // X64: entry: + // X64-NEXT: %0 = fcmp uno <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q); } __mmask8 test_mm256_mask_cmp_ps_mask_neq_uq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_neq_uq - // CHECK: [[CMP:%.*]] = fcmp une <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_neq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp une <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_nlt_us(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nlt_us - // CHECK: [[CMP:%.*]] = fcmp uge <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_nlt_us + // X64: entry: + // X64-NEXT: %0 = fcmp uge <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US); } __mmask8 test_mm256_mask_cmp_ps_mask_nle_us(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nle_us - // CHECK: [[CMP:%.*]] = fcmp ugt <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_nle_us + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US); } __mmask8 test_mm256_mask_cmp_ps_mask_ord_q(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ord_q - // CHECK: [[CMP:%.*]] = fcmp ord <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_ord_q + // X64: entry: + // X64-NEXT: %0 = fcmp ord <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q); } __mmask8 test_mm256_mask_cmp_ps_mask_eq_uq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_eq_uq - // CHECK: [[CMP:%.*]] = fcmp ueq <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_eq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_EQ_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_nge_us(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nge_us - // CHECK: [[CMP:%.*]] = fcmp ult <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_nge_us + // X64: entry: + // X64-NEXT: %0 = fcmp ult <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NGE_US); } __mmask8 test_mm256_mask_cmp_ps_mask_ngt_us(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ngt_us - // CHECK: [[CMP:%.*]] = fcmp ule <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_ngt_us + // X64: entry: + // X64-NEXT: %0 = fcmp ule <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NGT_US); } __mmask8 test_mm256_mask_cmp_ps_mask_false_oq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_false_oq - // CHECK: [[CMP:%.*]] = fcmp false <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_false_oq + // X64: entry: + // X64-NEXT: ret i8 0 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_neq_oq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_neq_oq - // CHECK: [[CMP:%.*]] = fcmp one <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_neq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp one <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_ge_os(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ge_os - // CHECK: [[CMP:%.*]] = fcmp oge <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_ge_os + // X64: entry: + // X64-NEXT: %0 = fcmp oge <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_GE_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_gt_os(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_gt_os - // CHECK: [[CMP:%.*]] = fcmp ogt <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_gt_os + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_GT_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_true_uq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_true_uq - // CHECK: [[CMP:%.*]] = fcmp true <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_true_uq + // X64: entry: + // X64-NEXT: ret i8 %m return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_eq_os(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_eq_os - // CHECK: [[CMP:%.*]] = fcmp oeq <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_eq_os + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_lt_oq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_lt_oq - // CHECK: [[CMP:%.*]] = fcmp olt <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_lt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp olt <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_LT_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_le_oq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_le_oq - // CHECK: [[CMP:%.*]] = fcmp ole <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_le_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ole <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_LE_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_unord_s(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_unord_s - // CHECK: [[CMP:%.*]] = fcmp uno <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_unord_s + // X64: entry: + // X64-NEXT: %0 = fcmp uno <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_S); } __mmask8 test_mm256_mask_cmp_ps_mask_neq_us(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_neq_us - // CHECK: [[CMP:%.*]] = fcmp une <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_neq_us + // X64: entry: + // X64-NEXT: %0 = fcmp une <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_US); } __mmask8 test_mm256_mask_cmp_ps_mask_nlt_uq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nlt_uq - // CHECK: [[CMP:%.*]] = fcmp uge <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_nlt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp uge <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NLT_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_nle_uq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nle_uq - // CHECK: [[CMP:%.*]] = fcmp ugt <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_nle_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NLE_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_ord_s(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ord_s - // CHECK: [[CMP:%.*]] = fcmp ord <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_ord_s + // X64: entry: + // X64-NEXT: %0 = fcmp ord <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_ORD_S); } __mmask8 test_mm256_mask_cmp_ps_mask_eq_us(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_eq_us - // CHECK: [[CMP:%.*]] = fcmp ueq <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_eq_us + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_EQ_US); } __mmask8 test_mm256_mask_cmp_ps_mask_nge_uq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nge_uq - // CHECK: [[CMP:%.*]] = fcmp ult <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_nge_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ult <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NGE_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_ngt_uq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ngt_uq - // CHECK: [[CMP:%.*]] = fcmp ule <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_ngt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ule <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NGT_UQ); } __mmask8 test_mm256_mask_cmp_ps_mask_false_os(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_false_os - // CHECK: [[CMP:%.*]] = fcmp false <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_false_os + // X64: entry: + // X64-NEXT: ret i8 0 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_neq_os(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_neq_os - // CHECK: [[CMP:%.*]] = fcmp one <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_neq_os + // X64: entry: + // X64-NEXT: %0 = fcmp one <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OS); } __mmask8 test_mm256_mask_cmp_ps_mask_ge_oq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ge_oq - // CHECK: [[CMP:%.*]] = fcmp oge <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_ge_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oge <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_GE_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_gt_oq(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_gt_oq - // CHECK: [[CMP:%.*]] = fcmp ogt <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_gt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <8 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %2 = and <8 x i1> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_GT_OQ); } __mmask8 test_mm256_mask_cmp_ps_mask_true_us(__mmask8 m, __m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_true_us - // CHECK: [[CMP:%.*]] = fcmp true <8 x float> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_ps_mask_true_us + // X64: entry: + // X64-NEXT: ret i8 %m return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_US); } __mmask8 test_mm256_cmp_pd_mask_eq_oq(__m256d a, __m256d b) { - // CHECK-LABEL: @test_mm256_cmp_pd_mask_eq_oq - // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_eq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_EQ_OQ); } __mmask8 test_mm256_cmp_pd_mask_lt_os(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_lt_os - // CHECK: fcmp olt <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_lt_os + // X64: entry: + // X64-NEXT: %0 = fcmp olt <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_LT_OS); } __mmask8 test_mm256_cmp_pd_mask_le_os(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_le_os - // CHECK: fcmp ole <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_le_os + // X64: entry: + // X64-NEXT: %0 = fcmp ole <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_LE_OS); } __mmask8 test_mm256_cmp_pd_mask_unord_q(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_unord_q - // CHECK: fcmp uno <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_unord_q + // X64: entry: + // X64-NEXT: %0 = fcmp uno <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_UNORD_Q); } __mmask8 test_mm256_cmp_pd_mask_neq_uq(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_neq_uq - // CHECK: fcmp une <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_neq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp une <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_NEQ_UQ); } __mmask8 test_mm256_cmp_pd_mask_nlt_us(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_nlt_us - // CHECK: fcmp uge <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_nlt_us + // X64: entry: + // X64-NEXT: %0 = fcmp uge <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_NLT_US); } __mmask8 test_mm256_cmp_pd_mask_nle_us(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_nle_us - // CHECK: fcmp ugt <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_nle_us + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_NLE_US); } __mmask8 test_mm256_cmp_pd_mask_ord_q(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_ord_q - // CHECK: fcmp ord <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_ord_q + // X64: entry: + // X64-NEXT: %0 = fcmp ord <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_ORD_Q); } __mmask8 test_mm256_cmp_pd_mask_eq_uq(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_eq_uq - // CHECK: fcmp ueq <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_eq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_EQ_UQ); } __mmask8 test_mm256_cmp_pd_mask_nge_us(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_nge_us - // CHECK: fcmp ult <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_nge_us + // X64: entry: + // X64-NEXT: %0 = fcmp ult <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_NGE_US); } __mmask8 test_mm256_cmp_pd_mask_ngt_us(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_ngt_us - // CHECK: fcmp ule <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_ngt_us + // X64: entry: + // X64-NEXT: %0 = fcmp ule <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_NGT_US); } __mmask8 test_mm256_cmp_pd_mask_false_oq(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_false_oq - // CHECK: fcmp false <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_false_oq + // X64: entry: + // X64-NEXT: ret i8 0 return _mm256_cmp_pd_mask(a, b, _CMP_FALSE_OQ); } __mmask8 test_mm256_cmp_pd_mask_neq_oq(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_neq_oq - // CHECK: fcmp one <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_neq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp one <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_NEQ_OQ); } __mmask8 test_mm256_cmp_pd_mask_ge_os(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_ge_os - // CHECK: fcmp oge <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_ge_os + // X64: entry: + // X64-NEXT: %0 = fcmp oge <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_GE_OS); } __mmask8 test_mm256_cmp_pd_mask_gt_os(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_gt_os - // CHECK: fcmp ogt <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_gt_os + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_GT_OS); } __mmask8 test_mm256_cmp_pd_mask_true_uq(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_true_uq - // CHECK: fcmp true <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_true_uq + // X64: entry: + // X64-NEXT: ret i8 15 return _mm256_cmp_pd_mask(a, b, _CMP_TRUE_UQ); } __mmask8 test_mm256_cmp_pd_mask_eq_os(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_eq_os - // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_eq_os + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_EQ_OS); } __mmask8 test_mm256_cmp_pd_mask_lt_oq(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_lt_oq - // CHECK: fcmp olt <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_lt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp olt <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_LT_OQ); } __mmask8 test_mm256_cmp_pd_mask_le_oq(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_le_oq - // CHECK: fcmp ole <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_le_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ole <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_LE_OQ); } __mmask8 test_mm256_cmp_pd_mask_unord_s(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_unord_s - // CHECK: fcmp uno <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_unord_s + // X64: entry: + // X64-NEXT: %0 = fcmp uno <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_UNORD_S); } __mmask8 test_mm256_cmp_pd_mask_neq_us(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_neq_us - // CHECK: fcmp une <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_neq_us + // X64: entry: + // X64-NEXT: %0 = fcmp une <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_NEQ_US); } __mmask8 test_mm256_cmp_pd_mask_nlt_uq(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_nlt_uq - // CHECK: fcmp uge <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_nlt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp uge <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_NLT_UQ); } __mmask8 test_mm256_cmp_pd_mask_nle_uq(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_nle_uq - // CHECK: fcmp ugt <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_nle_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_NLE_UQ); } __mmask8 test_mm256_cmp_pd_mask_ord_s(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_ord_s - // CHECK: fcmp ord <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_ord_s + // X64: entry: + // X64-NEXT: %0 = fcmp ord <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_ORD_S); } __mmask8 test_mm256_cmp_pd_mask_eq_us(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_eq_us - // CHECK: fcmp ueq <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_eq_us + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_EQ_US); } __mmask8 test_mm256_cmp_pd_mask_nge_uq(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_nge_uq - // CHECK: fcmp ult <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_nge_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ult <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_NGE_UQ); } __mmask8 test_mm256_cmp_pd_mask_ngt_uq(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_ngt_uq - // CHECK: fcmp ule <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_ngt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ule <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_NGT_UQ); } __mmask8 test_mm256_cmp_pd_mask_false_os(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_false_os - // CHECK: fcmp false <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_false_os + // X64: entry: + // X64-NEXT: ret i8 0 return _mm256_cmp_pd_mask(a, b, _CMP_FALSE_OS); } __mmask8 test_mm256_cmp_pd_mask_neq_os(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_neq_os - // CHECK: fcmp one <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_neq_os + // X64: entry: + // X64-NEXT: %0 = fcmp one <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_NEQ_OS); } __mmask8 test_mm256_cmp_pd_mask_ge_oq(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_ge_oq - // CHECK: fcmp oge <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_ge_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oge <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_GE_OQ); } __mmask8 test_mm256_cmp_pd_mask_gt_oq(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_gt_oq - // CHECK: fcmp ogt <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_gt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <4 x double> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_cmp_pd_mask(a, b, _CMP_GT_OQ); } __mmask8 test_mm256_cmp_pd_mask_true_us(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_cmp_pd_mask_true_us - // CHECK: fcmp true <4 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmp_pd_mask_true_us + // X64: entry: + // X64-NEXT: ret i8 15 return _mm256_cmp_pd_mask(a, b, _CMP_TRUE_US); } __mmask8 test_mm256_mask_cmp_pd_mask_eq_oq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: @test_mm256_mask_cmp_pd_mask_eq_oq - // CHECK: [[CMP:%.*]] = fcmp oeq <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_eq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_lt_os(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_lt_os - // CHECK: [[CMP:%.*]] = fcmp olt <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_lt_os + // X64: entry: + // X64-NEXT: %0 = fcmp olt <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_le_os(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_le_os - // CHECK: [[CMP:%.*]] = fcmp ole <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_le_os + // X64: entry: + // X64-NEXT: %0 = fcmp ole <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_unord_q(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_unord_q - // CHECK: [[CMP:%.*]] = fcmp uno <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_unord_q + // X64: entry: + // X64-NEXT: %0 = fcmp uno <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q); } __mmask8 test_mm256_mask_cmp_pd_mask_neq_uq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_neq_uq - // CHECK: [[CMP:%.*]] = fcmp une <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_neq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp une <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_nlt_us(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nlt_us - // CHECK: [[CMP:%.*]] = fcmp uge <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_nlt_us + // X64: entry: + // X64-NEXT: %0 = fcmp uge <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US); } __mmask8 test_mm256_mask_cmp_pd_mask_nle_us(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nle_us - // CHECK: [[CMP:%.*]] = fcmp ugt <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_nle_us + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US); } __mmask8 test_mm256_mask_cmp_pd_mask_ord_q(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ord_q - // CHECK: [[CMP:%.*]] = fcmp ord <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_ord_q + // X64: entry: + // X64-NEXT: %0 = fcmp ord <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q); } __mmask8 test_mm256_mask_cmp_pd_mask_eq_uq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_eq_uq - // CHECK: [[CMP:%.*]] = fcmp ueq <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_eq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_EQ_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_nge_us(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nge_us - // CHECK: [[CMP:%.*]] = fcmp ult <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_nge_us + // X64: entry: + // X64-NEXT: %0 = fcmp ult <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NGE_US); } __mmask8 test_mm256_mask_cmp_pd_mask_ngt_us(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ngt_us - // CHECK: [[CMP:%.*]] = fcmp ule <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_ngt_us + // X64: entry: + // X64-NEXT: %0 = fcmp ule <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NGT_US); } __mmask8 test_mm256_mask_cmp_pd_mask_false_oq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_false_oq - // CHECK: [[CMP:%.*]] = fcmp false <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_false_oq + // X64: entry: + // X64-NEXT: ret i8 0 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_neq_oq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_neq_oq - // CHECK: [[CMP:%.*]] = fcmp one <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_neq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp one <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_ge_os(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ge_os - // CHECK: [[CMP:%.*]] = fcmp oge <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_ge_os + // X64: entry: + // X64-NEXT: %0 = fcmp oge <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_GE_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_gt_os(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_gt_os - // CHECK: [[CMP:%.*]] = fcmp ogt <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_gt_os + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_GT_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_true_uq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_true_uq - // CHECK: [[CMP:%.*]] = fcmp true <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_true_uq + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = shufflevector <4 x i1> %extract, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_eq_os(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_eq_os - // CHECK: [[CMP:%.*]] = fcmp oeq <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_eq_os + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_lt_oq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_lt_oq - // CHECK: [[CMP:%.*]] = fcmp olt <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_lt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp olt <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_LT_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_le_oq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_le_oq - // CHECK: [[CMP:%.*]] = fcmp ole <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_le_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ole <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_LE_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_unord_s(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_unord_s - // CHECK: [[CMP:%.*]] = fcmp uno <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_unord_s + // X64: entry: + // X64-NEXT: %0 = fcmp uno <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_S); } __mmask8 test_mm256_mask_cmp_pd_mask_neq_us(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_neq_us - // CHECK: [[CMP:%.*]] = fcmp une <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_neq_us + // X64: entry: + // X64-NEXT: %0 = fcmp une <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_US); } __mmask8 test_mm256_mask_cmp_pd_mask_nlt_uq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nlt_uq - // CHECK: [[CMP:%.*]] = fcmp uge <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_nlt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp uge <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NLT_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_nle_uq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nle_uq - // CHECK: [[CMP:%.*]] = fcmp ugt <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_nle_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NLE_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_ord_s(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ord_s - // CHECK: [[CMP:%.*]] = fcmp ord <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_ord_s + // X64: entry: + // X64-NEXT: %0 = fcmp ord <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_ORD_S); } __mmask8 test_mm256_mask_cmp_pd_mask_eq_us(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_eq_us - // CHECK: [[CMP:%.*]] = fcmp ueq <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_eq_us + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_EQ_US); } __mmask8 test_mm256_mask_cmp_pd_mask_nge_uq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nge_uq - // CHECK: [[CMP:%.*]] = fcmp ult <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_nge_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ult <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NGE_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_ngt_uq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ngt_uq - // CHECK: [[CMP:%.*]] = fcmp ule <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_ngt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ule <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NGT_UQ); } __mmask8 test_mm256_mask_cmp_pd_mask_false_os(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_false_os - // CHECK: [[CMP:%.*]] = fcmp false <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_false_os + // X64: entry: + // X64-NEXT: ret i8 0 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_neq_os(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_neq_os - // CHECK: [[CMP:%.*]] = fcmp one <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_neq_os + // X64: entry: + // X64-NEXT: %0 = fcmp one <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OS); } __mmask8 test_mm256_mask_cmp_pd_mask_ge_oq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ge_oq - // CHECK: [[CMP:%.*]] = fcmp oge <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_ge_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oge <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_GE_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_gt_oq(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_gt_oq - // CHECK: [[CMP:%.*]] = fcmp ogt <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_gt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <4 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_GT_OQ); } __mmask8 test_mm256_mask_cmp_pd_mask_true_us(__mmask8 m, __m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_true_us - // CHECK: [[CMP:%.*]] = fcmp true <4 x double> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm256_mask_cmp_pd_mask_true_us + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = shufflevector <4 x i1> %extract, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_US); } __mmask8 test_mm_cmp_ps_mask_eq_oq(__m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_cmp_ps_mask_eq_oq - // CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_eq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_EQ_OQ); } __mmask8 test_mm_cmp_ps_mask_lt_os(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_lt_os - // CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_lt_os + // X64: entry: + // X64-NEXT: %0 = fcmp olt <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_LT_OS); } __mmask8 test_mm_cmp_ps_mask_le_os(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_le_os - // CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_le_os + // X64: entry: + // X64-NEXT: %0 = fcmp ole <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_LE_OS); } __mmask8 test_mm_cmp_ps_mask_unord_q(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_unord_q - // CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_unord_q + // X64: entry: + // X64-NEXT: %0 = fcmp uno <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_UNORD_Q); } __mmask8 test_mm_cmp_ps_mask_neq_uq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_neq_uq - // CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_neq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp une <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_NEQ_UQ); } __mmask8 test_mm_cmp_ps_mask_nlt_us(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_nlt_us - // CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_nlt_us + // X64: entry: + // X64-NEXT: %0 = fcmp uge <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_NLT_US); } __mmask8 test_mm_cmp_ps_mask_nle_us(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_nle_us - // CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_nle_us + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_NLE_US); } __mmask8 test_mm_cmp_ps_mask_ord_q(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_ord_q - // CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_ord_q + // X64: entry: + // X64-NEXT: %0 = fcmp ord <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_ORD_Q); } __mmask8 test_mm_cmp_ps_mask_eq_uq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_eq_uq - // CHECK: fcmp ueq <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_eq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_EQ_UQ); } __mmask8 test_mm_cmp_ps_mask_nge_us(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_nge_us - // CHECK: fcmp ult <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_nge_us + // X64: entry: + // X64-NEXT: %0 = fcmp ult <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_NGE_US); } __mmask8 test_mm_cmp_ps_mask_ngt_us(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_ngt_us - // CHECK: fcmp ule <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_ngt_us + // X64: entry: + // X64-NEXT: %0 = fcmp ule <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_NGT_US); } __mmask8 test_mm_cmp_ps_mask_false_oq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_false_oq - // CHECK: fcmp false <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_false_oq + // X64: entry: + // X64-NEXT: ret i8 0 return _mm_cmp_ps_mask(a, b, _CMP_FALSE_OQ); } __mmask8 test_mm_cmp_ps_mask_neq_oq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_neq_oq - // CHECK: fcmp one <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_neq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp one <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_NEQ_OQ); } __mmask8 test_mm_cmp_ps_mask_ge_os(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_ge_os - // CHECK: fcmp oge <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_ge_os + // X64: entry: + // X64-NEXT: %0 = fcmp oge <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_GE_OS); } __mmask8 test_mm_cmp_ps_mask_gt_os(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_gt_os - // CHECK: fcmp ogt <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_gt_os + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_GT_OS); } __mmask8 test_mm_cmp_ps_mask_true_uq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_true_uq - // CHECK: fcmp true <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_true_uq + // X64: entry: + // X64-NEXT: ret i8 15 return _mm_cmp_ps_mask(a, b, _CMP_TRUE_UQ); } __mmask8 test_mm_cmp_ps_mask_eq_os(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_eq_os - // CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_eq_os + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_EQ_OS); } __mmask8 test_mm_cmp_ps_mask_lt_oq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_lt_oq - // CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_lt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp olt <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_LT_OQ); } __mmask8 test_mm_cmp_ps_mask_le_oq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_le_oq - // CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_le_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ole <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_LE_OQ); } __mmask8 test_mm_cmp_ps_mask_unord_s(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_unord_s - // CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_unord_s + // X64: entry: + // X64-NEXT: %0 = fcmp uno <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_UNORD_S); } __mmask8 test_mm_cmp_ps_mask_neq_us(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_neq_us - // CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_neq_us + // X64: entry: + // X64-NEXT: %0 = fcmp une <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_NEQ_US); } __mmask8 test_mm_cmp_ps_mask_nlt_uq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_nlt_uq - // CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_nlt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp uge <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_NLT_UQ); } __mmask8 test_mm_cmp_ps_mask_nle_uq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_nle_uq - // CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_nle_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_NLE_UQ); } __mmask8 test_mm_cmp_ps_mask_ord_s(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_ord_s - // CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_ord_s + // X64: entry: + // X64-NEXT: %0 = fcmp ord <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_ORD_S); } __mmask8 test_mm_cmp_ps_mask_eq_us(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_eq_us - // CHECK: fcmp ueq <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_eq_us + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_EQ_US); } __mmask8 test_mm_cmp_ps_mask_nge_uq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_nge_uq - // CHECK: fcmp ult <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_nge_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ult <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_NGE_UQ); } __mmask8 test_mm_cmp_ps_mask_ngt_uq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_ngt_uq - // CHECK: fcmp ule <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_ngt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ule <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_NGT_UQ); } __mmask8 test_mm_cmp_ps_mask_false_os(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_false_os - // CHECK: fcmp false <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_false_os + // X64: entry: + // X64-NEXT: ret i8 0 return _mm_cmp_ps_mask(a, b, _CMP_FALSE_OS); } __mmask8 test_mm_cmp_ps_mask_neq_os(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_neq_os - // CHECK: fcmp one <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_neq_os + // X64: entry: + // X64-NEXT: %0 = fcmp one <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_NEQ_OS); } __mmask8 test_mm_cmp_ps_mask_ge_oq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_ge_oq - // CHECK: fcmp oge <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_ge_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oge <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_GE_OQ); } __mmask8 test_mm_cmp_ps_mask_gt_oq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_gt_oq - // CHECK: fcmp ogt <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_gt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <4 x float> %a, %b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_ps_mask(a, b, _CMP_GT_OQ); } __mmask8 test_mm_cmp_ps_mask_true_us(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_mask_true_us - // CHECK: fcmp true <4 x float> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_ps_mask_true_us + // X64: entry: + // X64-NEXT: ret i8 15 return _mm_cmp_ps_mask(a, b, _CMP_TRUE_US); } __mmask8 test_mm_mask_cmp_ps_mask_eq_oq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_mask_cmp_ps_mask_eq_oq - // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_eq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_lt_os(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_lt_os - // CHECK: [[CMP:%.*]] = fcmp olt <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_lt_os + // X64: entry: + // X64-NEXT: %0 = fcmp olt <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS); } __mmask8 test_mm_mask_cmp_ps_mask_le_os(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_le_os - // CHECK: [[CMP:%.*]] = fcmp ole <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_le_os + // X64: entry: + // X64-NEXT: %0 = fcmp ole <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS); } __mmask8 test_mm_mask_cmp_ps_mask_unord_q(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_unord_q - // CHECK: [[CMP:%.*]] = fcmp uno <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_unord_q + // X64: entry: + // X64-NEXT: %0 = fcmp uno <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q); } __mmask8 test_mm_mask_cmp_ps_mask_neq_uq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_neq_uq - // CHECK: [[CMP:%.*]] = fcmp une <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_neq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp une <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_nlt_us(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nlt_us - // CHECK: [[CMP:%.*]] = fcmp uge <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_nlt_us + // X64: entry: + // X64-NEXT: %0 = fcmp uge <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US); } __mmask8 test_mm_mask_cmp_ps_mask_nle_us(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nle_us - // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_nle_us + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US); } __mmask8 test_mm_mask_cmp_ps_mask_ord_q(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ord_q - // CHECK: [[CMP:%.*]] = fcmp ord <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_ord_q + // X64: entry: + // X64-NEXT: %0 = fcmp ord <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q); } __mmask8 test_mm_mask_cmp_ps_mask_eq_uq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_eq_uq - // CHECK: [[CMP:%.*]] = fcmp ueq <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_eq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_EQ_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_nge_us(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nge_us - // CHECK: [[CMP:%.*]] = fcmp ult <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_nge_us + // X64: entry: + // X64-NEXT: %0 = fcmp ult <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NGE_US); } __mmask8 test_mm_mask_cmp_ps_mask_ngt_us(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ngt_us - // CHECK: [[CMP:%.*]] = fcmp ule <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_ngt_us + // X64: entry: + // X64-NEXT: %0 = fcmp ule <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NGT_US); } __mmask8 test_mm_mask_cmp_ps_mask_false_oq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_false_oq - // CHECK: [[CMP:%.*]] = fcmp false <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_false_oq + // X64: entry: + // X64-NEXT: ret i8 0 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_neq_oq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_neq_oq - // CHECK: [[CMP:%.*]] = fcmp one <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_neq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp one <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_ge_os(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ge_os - // CHECK: [[CMP:%.*]] = fcmp oge <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_ge_os + // X64: entry: + // X64-NEXT: %0 = fcmp oge <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_GE_OS); } __mmask8 test_mm_mask_cmp_ps_mask_gt_os(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_gt_os - // CHECK: [[CMP:%.*]] = fcmp ogt <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_gt_os + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_GT_OS); } __mmask8 test_mm_mask_cmp_ps_mask_true_uq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_true_uq - // CHECK: [[CMP:%.*]] = fcmp true <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_true_uq + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = shufflevector <4 x i1> %extract, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_eq_os(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_eq_os - // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_eq_os + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OS); } __mmask8 test_mm_mask_cmp_ps_mask_lt_oq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_lt_oq - // CHECK: [[CMP:%.*]] = fcmp olt <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_lt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp olt <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_LT_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_le_oq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_le_oq - // CHECK: [[CMP:%.*]] = fcmp ole <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_le_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ole <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_LE_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_unord_s(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_unord_s - // CHECK: [[CMP:%.*]] = fcmp uno <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_unord_s + // X64: entry: + // X64-NEXT: %0 = fcmp uno <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_S); } __mmask8 test_mm_mask_cmp_ps_mask_neq_us(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_neq_us - // CHECK: [[CMP:%.*]] = fcmp une <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_neq_us + // X64: entry: + // X64-NEXT: %0 = fcmp une <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_US); } __mmask8 test_mm_mask_cmp_ps_mask_nlt_uq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nlt_uq - // CHECK: [[CMP:%.*]] = fcmp uge <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_nlt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp uge <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NLT_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_nle_uq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nle_uq - // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_nle_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NLE_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_ord_s(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ord_s - // CHECK: [[CMP:%.*]] = fcmp ord <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_ord_s + // X64: entry: + // X64-NEXT: %0 = fcmp ord <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_ORD_S); } __mmask8 test_mm_mask_cmp_ps_mask_eq_us(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_eq_us - // CHECK: [[CMP:%.*]] = fcmp ueq <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_eq_us + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_EQ_US); } __mmask8 test_mm_mask_cmp_ps_mask_nge_uq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nge_uq - // CHECK: [[CMP:%.*]] = fcmp ult <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_nge_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ult <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NGE_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_ngt_uq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ngt_uq - // CHECK: [[CMP:%.*]] = fcmp ule <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_ngt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ule <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NGT_UQ); } __mmask8 test_mm_mask_cmp_ps_mask_false_os(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_false_os - // CHECK: [[CMP:%.*]] = fcmp false <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_false_os + // X64: entry: + // X64-NEXT: ret i8 0 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OS); } __mmask8 test_mm_mask_cmp_ps_mask_neq_os(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_neq_os - // CHECK: [[CMP:%.*]] = fcmp one <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_neq_os + // X64: entry: + // X64-NEXT: %0 = fcmp one <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OS); } __mmask8 test_mm_mask_cmp_ps_mask_ge_oq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ge_oq - // CHECK: [[CMP:%.*]] = fcmp oge <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_ge_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oge <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_GE_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_gt_oq(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_gt_oq - // CHECK: [[CMP:%.*]] = fcmp ogt <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_gt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <4 x float> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_GT_OQ); } __mmask8 test_mm_mask_cmp_ps_mask_true_us(__mmask8 m, __m128 a, __m128 b) { - // CHECK-LABEL: test_mm_mask_cmp_ps_mask_true_us - // CHECK: [[CMP:%.*]] = fcmp true <4 x float> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_ps_mask_true_us + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = shufflevector <4 x i1> %extract, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_US); } __mmask8 test_mm_cmp_pd_mask_eq_oq(__m128d a, __m128d b) { - // CHECK-LABEL: @test_mm_cmp_pd_mask_eq_oq - // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_eq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_EQ_OQ); } __mmask8 test_mm_cmp_pd_mask_lt_os(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_lt_os - // CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_lt_os + // X64: entry: + // X64-NEXT: %0 = fcmp olt <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_LT_OS); } __mmask8 test_mm_cmp_pd_mask_le_os(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_le_os - // CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_le_os + // X64: entry: + // X64-NEXT: %0 = fcmp ole <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_LE_OS); } __mmask8 test_mm_cmp_pd_mask_unord_q(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_unord_q - // CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_unord_q + // X64: entry: + // X64-NEXT: %0 = fcmp uno <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_UNORD_Q); } __mmask8 test_mm_cmp_pd_mask_neq_uq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_neq_uq - // CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_neq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp une <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_NEQ_UQ); } __mmask8 test_mm_cmp_pd_mask_nlt_us(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_nlt_us - // CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_nlt_us + // X64: entry: + // X64-NEXT: %0 = fcmp uge <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_NLT_US); } __mmask8 test_mm_cmp_pd_mask_nle_us(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_nle_us - // CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_nle_us + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_NLE_US); } __mmask8 test_mm_cmp_pd_mask_ord_q(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_ord_q - // CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_ord_q + // X64: entry: + // X64-NEXT: %0 = fcmp ord <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_ORD_Q); } __mmask8 test_mm_cmp_pd_mask_eq_uq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_eq_uq - // CHECK: fcmp ueq <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_eq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_EQ_UQ); } __mmask8 test_mm_cmp_pd_mask_nge_us(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_nge_us - // CHECK: fcmp ult <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_nge_us + // X64: entry: + // X64-NEXT: %0 = fcmp ult <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_NGE_US); } __mmask8 test_mm_cmp_pd_mask_ngt_us(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_ngt_us - // CHECK: fcmp ule <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_ngt_us + // X64: entry: + // X64-NEXT: %0 = fcmp ule <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_NGT_US); } __mmask8 test_mm_cmp_pd_mask_false_oq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_false_oq - // CHECK: fcmp false <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_false_oq + // X64: entry: + // X64-NEXT: ret i8 0 return _mm_cmp_pd_mask(a, b, _CMP_FALSE_OQ); } __mmask8 test_mm_cmp_pd_mask_neq_oq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_neq_oq - // CHECK: fcmp one <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_neq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp one <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_NEQ_OQ); } __mmask8 test_mm_cmp_pd_mask_ge_os(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_ge_os - // CHECK: fcmp oge <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_ge_os + // X64: entry: + // X64-NEXT: %0 = fcmp oge <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_GE_OS); } __mmask8 test_mm_cmp_pd_mask_gt_os(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_gt_os - // CHECK: fcmp ogt <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_gt_os + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_GT_OS); } __mmask8 test_mm_cmp_pd_mask_true_uq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_true_uq - // CHECK: fcmp true <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_true_uq + // X64: entry: + // X64-NEXT: ret i8 3 return _mm_cmp_pd_mask(a, b, _CMP_TRUE_UQ); } __mmask8 test_mm_cmp_pd_mask_eq_os(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_eq_os - // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_eq_os + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_EQ_OS); } __mmask8 test_mm_cmp_pd_mask_lt_oq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_lt_oq - // CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_lt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp olt <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_LT_OQ); } __mmask8 test_mm_cmp_pd_mask_le_oq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_le_oq - // CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_le_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ole <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_LE_OQ); } __mmask8 test_mm_cmp_pd_mask_unord_s(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_unord_s - // CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_unord_s + // X64: entry: + // X64-NEXT: %0 = fcmp uno <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_UNORD_S); } __mmask8 test_mm_cmp_pd_mask_neq_us(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_neq_us - // CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_neq_us + // X64: entry: + // X64-NEXT: %0 = fcmp une <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_NEQ_US); } __mmask8 test_mm_cmp_pd_mask_nlt_uq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_nlt_uq - // CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_nlt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp uge <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_NLT_UQ); } __mmask8 test_mm_cmp_pd_mask_nle_uq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_nle_uq - // CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_nle_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_NLE_UQ); } __mmask8 test_mm_cmp_pd_mask_ord_s(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_ord_s - // CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_ord_s + // X64: entry: + // X64-NEXT: %0 = fcmp ord <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_ORD_S); } __mmask8 test_mm_cmp_pd_mask_eq_us(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_eq_us - // CHECK: fcmp ueq <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_eq_us + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_EQ_US); } __mmask8 test_mm_cmp_pd_mask_nge_uq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_nge_uq - // CHECK: fcmp ult <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_nge_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ult <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_NGE_UQ); } __mmask8 test_mm_cmp_pd_mask_ngt_uq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_ngt_uq - // CHECK: fcmp ule <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_ngt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ule <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_NGT_UQ); } __mmask8 test_mm_cmp_pd_mask_false_os(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_false_os - // CHECK: fcmp false <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_false_os + // X64: entry: + // X64-NEXT: ret i8 0 return _mm_cmp_pd_mask(a, b, _CMP_FALSE_OS); } __mmask8 test_mm_cmp_pd_mask_neq_os(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_neq_os - // CHECK: fcmp one <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_neq_os + // X64: entry: + // X64-NEXT: %0 = fcmp one <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_NEQ_OS); } __mmask8 test_mm_cmp_pd_mask_ge_oq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_ge_oq - // CHECK: fcmp oge <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_ge_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oge <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_GE_OQ); } __mmask8 test_mm_cmp_pd_mask_gt_oq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_gt_oq - // CHECK: fcmp ogt <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_gt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <2 x double> %a, %b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_cmp_pd_mask(a, b, _CMP_GT_OQ); } __mmask8 test_mm_cmp_pd_mask_true_us(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_mask_true_us - // CHECK: fcmp true <2 x double> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmp_pd_mask_true_us + // X64: entry: + // X64-NEXT: ret i8 3 return _mm_cmp_pd_mask(a, b, _CMP_TRUE_US); } __mmask8 test_mm_mask_cmp_pd_mask_eq_oq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: @test_mm_mask_cmp_pd_mask_eq_oq - // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_eq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_lt_os(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_lt_os - // CHECK: [[CMP:%.*]] = fcmp olt <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_lt_os + // X64: entry: + // X64-NEXT: %0 = fcmp olt <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS); } __mmask8 test_mm_mask_cmp_pd_mask_le_os(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_le_os - // CHECK: [[CMP:%.*]] = fcmp ole <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_le_os + // X64: entry: + // X64-NEXT: %0 = fcmp ole <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS); } __mmask8 test_mm_mask_cmp_pd_mask_unord_q(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_unord_q - // CHECK: [[CMP:%.*]] = fcmp uno <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_unord_q + // X64: entry: + // X64-NEXT: %0 = fcmp uno <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q); } __mmask8 test_mm_mask_cmp_pd_mask_neq_uq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_neq_uq - // CHECK: [[CMP:%.*]] = fcmp une <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_neq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp une <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_nlt_us(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nlt_us - // CHECK: [[CMP:%.*]] = fcmp uge <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_nlt_us + // X64: entry: + // X64-NEXT: %0 = fcmp uge <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US); } __mmask8 test_mm_mask_cmp_pd_mask_nle_us(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nle_us - // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_nle_us + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US); } __mmask8 test_mm_mask_cmp_pd_mask_ord_q(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ord_q - // CHECK: [[CMP:%.*]] = fcmp ord <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_ord_q + // X64: entry: + // X64-NEXT: %0 = fcmp ord <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q); } __mmask8 test_mm_mask_cmp_pd_mask_eq_uq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_eq_uq - // CHECK: [[CMP:%.*]] = fcmp ueq <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_eq_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_EQ_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_nge_us(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nge_us - // CHECK: [[CMP:%.*]] = fcmp ult <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_nge_us + // X64: entry: + // X64-NEXT: %0 = fcmp ult <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NGE_US); } __mmask8 test_mm_mask_cmp_pd_mask_ngt_us(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ngt_us - // CHECK: [[CMP:%.*]] = fcmp ule <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_ngt_us + // X64: entry: + // X64-NEXT: %0 = fcmp ule <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NGT_US); } __mmask8 test_mm_mask_cmp_pd_mask_false_oq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_false_oq - // CHECK: [[CMP:%.*]] = fcmp false <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_false_oq + // X64: entry: + // X64-NEXT: ret i8 0 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_neq_oq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_neq_oq - // CHECK: [[CMP:%.*]] = fcmp one <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_neq_oq + // X64: entry: + // X64-NEXT: %0 = fcmp one <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_ge_os(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ge_os - // CHECK: [[CMP:%.*]] = fcmp oge <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_ge_os + // X64: entry: + // X64-NEXT: %0 = fcmp oge <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_GE_OS); } __mmask8 test_mm_mask_cmp_pd_mask_gt_os(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_gt_os - // CHECK: [[CMP:%.*]] = fcmp ogt <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_gt_os + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_GT_OS); } __mmask8 test_mm_mask_cmp_pd_mask_true_uq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_true_uq - // CHECK: [[CMP:%.*]] = fcmp true <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_true_uq + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = shufflevector <2 x i1> %extract, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_eq_os(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_eq_os - // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_eq_os + // X64: entry: + // X64-NEXT: %0 = fcmp oeq <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OS); } __mmask8 test_mm_mask_cmp_pd_mask_lt_oq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_lt_oq - // CHECK: [[CMP:%.*]] = fcmp olt <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_lt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp olt <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_LT_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_le_oq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_le_oq - // CHECK: [[CMP:%.*]] = fcmp ole <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_le_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ole <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_LE_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_unord_s(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_unord_s - // CHECK: [[CMP:%.*]] = fcmp uno <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_unord_s + // X64: entry: + // X64-NEXT: %0 = fcmp uno <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_S); } __mmask8 test_mm_mask_cmp_pd_mask_neq_us(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_neq_us - // CHECK: [[CMP:%.*]] = fcmp une <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_neq_us + // X64: entry: + // X64-NEXT: %0 = fcmp une <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_US); } __mmask8 test_mm_mask_cmp_pd_mask_nlt_uq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nlt_uq - // CHECK: [[CMP:%.*]] = fcmp uge <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_nlt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp uge <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NLT_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_nle_uq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nle_uq - // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_nle_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ugt <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NLE_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_ord_s(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ord_s - // CHECK: [[CMP:%.*]] = fcmp ord <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_ord_s + // X64: entry: + // X64-NEXT: %0 = fcmp ord <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_ORD_S); } __mmask8 test_mm_mask_cmp_pd_mask_eq_us(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_eq_us - // CHECK: [[CMP:%.*]] = fcmp ueq <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_eq_us + // X64: entry: + // X64-NEXT: %0 = fcmp ueq <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_EQ_US); } __mmask8 test_mm_mask_cmp_pd_mask_nge_uq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nge_uq - // CHECK: [[CMP:%.*]] = fcmp ult <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_nge_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ult <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NGE_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_ngt_uq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ngt_uq - // CHECK: [[CMP:%.*]] = fcmp ule <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_ngt_uq + // X64: entry: + // X64-NEXT: %0 = fcmp ule <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NGT_UQ); } __mmask8 test_mm_mask_cmp_pd_mask_false_os(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_false_os - // CHECK: [[CMP:%.*]] = fcmp false <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_false_os + // X64: entry: + // X64-NEXT: ret i8 0 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OS); } __mmask8 test_mm_mask_cmp_pd_mask_neq_os(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_neq_os - // CHECK: [[CMP:%.*]] = fcmp one <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_neq_os + // X64: entry: + // X64-NEXT: %0 = fcmp one <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OS); } __mmask8 test_mm_mask_cmp_pd_mask_ge_oq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ge_oq - // CHECK: [[CMP:%.*]] = fcmp oge <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_ge_oq + // X64: entry: + // X64-NEXT: %0 = fcmp oge <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_GE_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_gt_oq(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_gt_oq - // CHECK: [[CMP:%.*]] = fcmp ogt <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_gt_oq + // X64: entry: + // X64-NEXT: %0 = fcmp ogt <2 x double> %a, %b + // X64-NEXT: %1 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_GT_OQ); } __mmask8 test_mm_mask_cmp_pd_mask_true_us(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: test_mm_mask_cmp_pd_mask_true_us - // CHECK: [[CMP:%.*]] = fcmp true <2 x double> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> [[CMP]], {{.*}} + // X64-LABEL: test_mm_mask_cmp_pd_mask_true_us + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %m to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = shufflevector <2 x i1> %extract, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_US); } __m128d test_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - // CHECK-LABEL: @test_mm_mask_fmadd_pd - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_fmadd_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__A + // X64-NEXT: ret <2 x double> %2 return _mm_mask_fmadd_pd(__A, __U, __B, __C); } __m128d test_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - // CHECK-LABEL: @test_mm_mask_fmsub_pd - // CHECK: fsub <2 x double> , %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_fmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <2 x double> , %__C + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %sub.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__A + // X64-NEXT: ret <2 x double> %2 return _mm_mask_fmsub_pd(__A, __U, __B, __C); } __m128d test_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm_mask3_fmadd_pd - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask3_fmadd_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__C + // X64-NEXT: ret <2 x double> %2 return _mm_mask3_fmadd_pd(__A, __B, __C, __U); } __m128d test_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm_mask3_fnmadd_pd - // CHECK: fsub <2 x double> , %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask3_fnmadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <2 x double> , %__A + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %__B, <2 x double> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__C + // X64-NEXT: ret <2 x double> %2 return _mm_mask3_fnmadd_pd(__A, __B, __C, __U); } __m128d test_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - // CHECK-LABEL: @test_mm_maskz_fmadd_pd - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_fmadd_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_fmadd_pd(__U, __A, __B, __C); } __m128d test_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - // CHECK-LABEL: @test_mm_maskz_fmsub_pd - // CHECK: fsub <2 x double> , %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_fmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <2 x double> , %__C + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %sub.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_fmsub_pd(__U, __A, __B, __C); } __m128d test_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - // CHECK-LABEL: @test_mm_maskz_fnmadd_pd - // CHECK: fsub <2 x double> , %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_fnmadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <2 x double> , %__A + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %__B, <2 x double> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_fnmadd_pd(__U, __A, __B, __C); } __m128d test_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - // CHECK-LABEL: @test_mm_maskz_fnmsub_pd - // CHECK: fsub <2 x double> , %{{.*}} - // CHECK: fsub <2 x double> , %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_fnmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <2 x double> , %__A + // X64-NEXT: %sub1.i = fsub <2 x double> , %__C + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %__B, <2 x double> %sub1.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_fnmsub_pd(__U, __A, __B, __C); } __m256d test_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - // CHECK-LABEL: @test_mm256_mask_fmadd_pd - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_fmadd_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__A + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_fmadd_pd(__A, __U, __B, __C); } __m256d test_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - // CHECK-LABEL: @test_mm256_mask_fmsub_pd - // CHECK: fsub <4 x double> , %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_fmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x double> , %__C + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %sub.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__A + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_fmsub_pd(__A, __U, __B, __C); } __m256d test_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm256_mask3_fmadd_pd - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask3_fmadd_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__C + // X64-NEXT: ret <4 x double> %2 return _mm256_mask3_fmadd_pd(__A, __B, __C, __U); } __m256d test_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm256_mask3_fnmadd_pd - // CHECK: fsub <4 x double> , %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask3_fnmadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x double> , %__A + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %__B, <4 x double> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__C + // X64-NEXT: ret <4 x double> %2 return _mm256_mask3_fnmadd_pd(__A, __B, __C, __U); } __m256d test_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - // CHECK-LABEL: @test_mm256_maskz_fmadd_pd - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_fmadd_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_fmadd_pd(__U, __A, __B, __C); } __m256d test_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - // CHECK-LABEL: @test_mm256_maskz_fmsub_pd - // CHECK: fsub <4 x double> , %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_fmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x double> , %__C + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %sub.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_fmsub_pd(__U, __A, __B, __C); } __m256d test_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - // CHECK-LABEL: @test_mm256_maskz_fnmadd_pd - // CHECK: fsub <4 x double> , %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_fnmadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x double> , %__A + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %__B, <4 x double> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_fnmadd_pd(__U, __A, __B, __C); } __m256d test_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - // CHECK-LABEL: @test_mm256_maskz_fnmsub_pd - // CHECK: fsub <4 x double> , %{{.*}} - // CHECK: fsub <4 x double> , %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_fnmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x double> , %__A + // X64-NEXT: %sub1.i = fsub <4 x double> , %__C + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %__B, <4 x double> %sub1.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_fnmsub_pd(__U, __A, __B, __C); } __m128 test_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - // CHECK-LABEL: @test_mm_mask_fmadd_ps - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_fmadd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__A + // X64-NEXT: ret <4 x float> %2 return _mm_mask_fmadd_ps(__A, __U, __B, __C); } __m128 test_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - // CHECK-LABEL: @test_mm_mask_fmsub_ps - // CHECK: fsub <4 x float> , %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_fmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x float> , %__C + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %sub.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__A + // X64-NEXT: ret <4 x float> %2 return _mm_mask_fmsub_ps(__A, __U, __B, __C); } __m128 test_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm_mask3_fmadd_ps - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask3_fmadd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__C + // X64-NEXT: ret <4 x float> %2 return _mm_mask3_fmadd_ps(__A, __B, __C, __U); } __m128 test_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm_mask3_fnmadd_ps - // CHECK: fsub <4 x float> , %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask3_fnmadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x float> , %__A + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %__B, <4 x float> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__C + // X64-NEXT: ret <4 x float> %2 return _mm_mask3_fnmadd_ps(__A, __B, __C, __U); } __m128 test_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - // CHECK-LABEL: @test_mm_maskz_fmadd_ps - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_fmadd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %2 return _mm_maskz_fmadd_ps(__U, __A, __B, __C); } __m128 test_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - // CHECK-LABEL: @test_mm_maskz_fmsub_ps - // CHECK: fsub <4 x float> , %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_fmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x float> , %__C + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %sub.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %2 return _mm_maskz_fmsub_ps(__U, __A, __B, __C); } __m128 test_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - // CHECK-LABEL: @test_mm_maskz_fnmadd_ps - // CHECK: fsub <4 x float> , %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_fnmadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x float> , %__A + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %__B, <4 x float> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %2 return _mm_maskz_fnmadd_ps(__U, __A, __B, __C); } __m128 test_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - // CHECK-LABEL: @test_mm_maskz_fnmsub_ps - // CHECK: fsub <4 x float> , %{{.*}} - // CHECK: fsub <4 x float> , %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_fnmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x float> , %__A + // X64-NEXT: %sub1.i = fsub <4 x float> , %__C + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %__B, <4 x float> %sub1.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %2 return _mm_maskz_fnmsub_ps(__U, __A, __B, __C); } __m256 test_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - // CHECK-LABEL: @test_mm256_mask_fmadd_ps - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_fmadd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__A + // X64-NEXT: ret <8 x float> %2 return _mm256_mask_fmadd_ps(__A, __U, __B, __C); } __m256 test_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - // CHECK-LABEL: @test_mm256_mask_fmsub_ps - // CHECK: fsub <8 x float> , %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_fmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x float> , %__C + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %sub.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__A + // X64-NEXT: ret <8 x float> %2 return _mm256_mask_fmsub_ps(__A, __U, __B, __C); } __m256 test_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm256_mask3_fmadd_ps - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask3_fmadd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__C + // X64-NEXT: ret <8 x float> %2 return _mm256_mask3_fmadd_ps(__A, __B, __C, __U); } __m256 test_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm256_mask3_fnmadd_ps - // CHECK: fsub <8 x float> , %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask3_fnmadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x float> , %__A + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %__B, <8 x float> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__C + // X64-NEXT: ret <8 x float> %2 return _mm256_mask3_fnmadd_ps(__A, __B, __C, __U); } __m256 test_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - // CHECK-LABEL: @test_mm256_maskz_fmadd_ps - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_fmadd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %2 return _mm256_maskz_fmadd_ps(__U, __A, __B, __C); } __m256 test_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - // CHECK-LABEL: @test_mm256_maskz_fmsub_ps - // CHECK: fsub <8 x float> , %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_fmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x float> , %__C + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %sub.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %2 return _mm256_maskz_fmsub_ps(__U, __A, __B, __C); } __m256 test_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - // CHECK-LABEL: @test_mm256_maskz_fnmadd_ps - // CHECK: fsub <8 x float> , %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_fnmadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x float> , %__A + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %__B, <8 x float> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %2 return _mm256_maskz_fnmadd_ps(__U, __A, __B, __C); } __m256 test_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - // CHECK-LABEL: @test_mm256_maskz_fnmsub_ps - // CHECK: fsub <8 x float> , %{{.*}} - // CHECK: fsub <8 x float> , %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_fnmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x float> , %__A + // X64-NEXT: %sub1.i = fsub <8 x float> , %__C + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %__B, <8 x float> %sub1.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %2 return _mm256_maskz_fnmsub_ps(__U, __A, __B, __C); } __m128d test_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - // CHECK-LABEL: @test_mm_mask_fmaddsub_pd - // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]] - // CHECK: shufflevector <2 x double> [[SUB]], <2 x double> [[ADD]], <2 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_fmaddsub_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C) #9 + // X64-NEXT: %1 = fsub <2 x double> , %__C + // X64-NEXT: %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %1) #9 + // X64-NEXT: %3 = shufflevector <2 x double> %2, <2 x double> %0, <2 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <2 x i32> + // X64-NEXT: %5 = select <2 x i1> %extract.i, <2 x double> %3, <2 x double> %__A + // X64-NEXT: ret <2 x double> %5 return _mm_mask_fmaddsub_pd(__A, __U, __B, __C); } __m128d test_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - // CHECK-LABEL: @test_mm_mask_fmsubadd_pd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]] - // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: shufflevector <2 x double> [[ADD]], <2 x double> [[SUB]], <2 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_fmsubadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <2 x double> , %__C + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %sub.i) #9 + // X64-NEXT: %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C) #9 + // X64-NEXT: %2 = shufflevector <2 x double> %1, <2 x double> %0, <2 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> + // X64-NEXT: %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__A + // X64-NEXT: ret <2 x double> %4 return _mm_mask_fmsubadd_pd(__A, __U, __B, __C); } __m128d test_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm_mask3_fmaddsub_pd - // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]] - // CHECK: shufflevector <2 x double> [[SUB]], <2 x double> [[ADD]], <2 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask3_fmaddsub_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C) #9 + // X64-NEXT: %1 = fsub <2 x double> , %__C + // X64-NEXT: %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %1) #9 + // X64-NEXT: %3 = shufflevector <2 x double> %2, <2 x double> %0, <2 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <2 x i32> + // X64-NEXT: %5 = select <2 x i1> %extract.i, <2 x double> %3, <2 x double> %__C + // X64-NEXT: ret <2 x double> %5 return _mm_mask3_fmaddsub_pd(__A, __B, __C, __U); } __m128d test_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - // CHECK-LABEL: @test_mm_maskz_fmaddsub_pd - // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]] - // CHECK: shufflevector <2 x double> [[SUB]], <2 x double> [[ADD]], <2 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_fmaddsub_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C) #9 + // X64-NEXT: %1 = fsub <2 x double> , %__C + // X64-NEXT: %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %1) #9 + // X64-NEXT: %3 = shufflevector <2 x double> %2, <2 x double> %0, <2 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <2 x i32> + // X64-NEXT: %5 = select <2 x i1> %extract.i, <2 x double> %3, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %5 return _mm_maskz_fmaddsub_pd(__U, __A, __B, __C); } __m128d test_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - // CHECK-LABEL: @test_mm_maskz_fmsubadd_pd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]] - // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: shufflevector <2 x double> [[ADD]], <2 x double> [[SUB]], <2 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_fmsubadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <2 x double> , %__C + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %sub.i) #9 + // X64-NEXT: %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C) #9 + // X64-NEXT: %2 = shufflevector <2 x double> %1, <2 x double> %0, <2 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> + // X64-NEXT: %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %4 return _mm_maskz_fmsubadd_pd(__U, __A, __B, __C); } __m256d test_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - // CHECK-LABEL: @test_mm256_mask_fmaddsub_pd - // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <4 x double> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: shufflevector <4 x double> [[SUB]], <4 x double> [[ADD]], <4 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_fmaddsub_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C) #9 + // X64-NEXT: %1 = fsub <4 x double> , %__C + // X64-NEXT: %2 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %1) #9 + // X64-NEXT: %3 = shufflevector <4 x double> %2, <4 x double> %0, <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x double> %3, <4 x double> %__A + // X64-NEXT: ret <4 x double> %5 return _mm256_mask_fmaddsub_pd(__A, __U, __B, __C); } __m256d test_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - // CHECK-LABEL: @test_mm256_mask_fmsubadd_pd - // CHECK: [[NEG:%.+]] = fsub <4 x double> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]] - // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: shufflevector <4 x double> [[ADD]], <4 x double> [[SUB]], <4 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_fmsubadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x double> , %__C + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %sub.i) #9 + // X64-NEXT: %1 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C) #9 + // X64-NEXT: %2 = shufflevector <4 x double> %1, <4 x double> %0, <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__A + // X64-NEXT: ret <4 x double> %4 return _mm256_mask_fmsubadd_pd(__A, __U, __B, __C); } __m256d test_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm256_mask3_fmaddsub_pd - // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <4 x double> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: shufflevector <4 x double> [[SUB]], <4 x double> [[ADD]], <4 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask3_fmaddsub_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C) #9 + // X64-NEXT: %1 = fsub <4 x double> , %__C + // X64-NEXT: %2 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %1) #9 + // X64-NEXT: %3 = shufflevector <4 x double> %2, <4 x double> %0, <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x double> %3, <4 x double> %__C + // X64-NEXT: ret <4 x double> %5 return _mm256_mask3_fmaddsub_pd(__A, __B, __C, __U); } __m256d test_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - // CHECK-LABEL: @test_mm256_maskz_fmaddsub_pd - // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <4 x double> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: shufflevector <4 x double> [[SUB]], <4 x double> [[ADD]], <4 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_fmaddsub_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C) #9 + // X64-NEXT: %1 = fsub <4 x double> , %__C + // X64-NEXT: %2 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %1) #9 + // X64-NEXT: %3 = shufflevector <4 x double> %2, <4 x double> %0, <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x double> %3, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %5 return _mm256_maskz_fmaddsub_pd(__U, __A, __B, __C); } __m256d test_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - // CHECK-LABEL: @test_mm256_maskz_fmsubadd_pd - // CHECK: [[NEG:%.+]] = fsub <4 x double> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]] - // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: shufflevector <4 x double> [[ADD]], <4 x double> [[SUB]], <4 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_fmsubadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x double> , %__C + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %sub.i) #9 + // X64-NEXT: %1 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C) #9 + // X64-NEXT: %2 = shufflevector <4 x double> %1, <4 x double> %0, <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %4 return _mm256_maskz_fmsubadd_pd(__U, __A, __B, __C); } __m128 test_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - // CHECK-LABEL: @test_mm_mask_fmaddsub_ps - // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]] - // CHECK: shufflevector <4 x float> [[SUB]], <4 x float> [[ADD]], <4 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_fmaddsub_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C) #9 + // X64-NEXT: %1 = fsub <4 x float> , %__C + // X64-NEXT: %2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %1) #9 + // X64-NEXT: %3 = shufflevector <4 x float> %2, <4 x float> %0, <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x float> %3, <4 x float> %__A + // X64-NEXT: ret <4 x float> %5 return _mm_mask_fmaddsub_ps(__A, __U, __B, __C); } __m128 test_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - // CHECK-LABEL: @test_mm_mask_fmsubadd_ps - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]] - // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: shufflevector <4 x float> [[ADD]], <4 x float> [[SUB]], <4 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_fmsubadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x float> , %__C + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %sub.i) #9 + // X64-NEXT: %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C) #9 + // X64-NEXT: %2 = shufflevector <4 x float> %1, <4 x float> %0, <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__A + // X64-NEXT: ret <4 x float> %4 return _mm_mask_fmsubadd_ps(__A, __U, __B, __C); } __m128 test_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm_mask3_fmaddsub_ps - // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]] - // CHECK: shufflevector <4 x float> [[SUB]], <4 x float> [[ADD]], <4 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask3_fmaddsub_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C) #9 + // X64-NEXT: %1 = fsub <4 x float> , %__C + // X64-NEXT: %2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %1) #9 + // X64-NEXT: %3 = shufflevector <4 x float> %2, <4 x float> %0, <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x float> %3, <4 x float> %__C + // X64-NEXT: ret <4 x float> %5 return _mm_mask3_fmaddsub_ps(__A, __B, __C, __U); } __m128 test_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - // CHECK-LABEL: @test_mm_maskz_fmaddsub_ps - // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]] - // CHECK: shufflevector <4 x float> [[SUB]], <4 x float> [[ADD]], <4 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_fmaddsub_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C) #9 + // X64-NEXT: %1 = fsub <4 x float> , %__C + // X64-NEXT: %2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %1) #9 + // X64-NEXT: %3 = shufflevector <4 x float> %2, <4 x float> %0, <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x float> %3, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %5 return _mm_maskz_fmaddsub_ps(__U, __A, __B, __C); } __m128 test_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - // CHECK-LABEL: @test_mm_maskz_fmsubadd_ps - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]] - // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: shufflevector <4 x float> [[ADD]], <4 x float> [[SUB]], <4 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_fmsubadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x float> , %__C + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %sub.i) #9 + // X64-NEXT: %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C) #9 + // X64-NEXT: %2 = shufflevector <4 x float> %1, <4 x float> %0, <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %4 return _mm_maskz_fmsubadd_ps(__U, __A, __B, __C); } __m256 test_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - // CHECK-LABEL: @test_mm256_mask_fmaddsub_ps - // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <8 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]] - // CHECK: shufflevector <8 x float> [[SUB]], <8 x float> [[ADD]], <8 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_fmaddsub_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C) #9 + // X64-NEXT: %1 = fsub <8 x float> , %__C + // X64-NEXT: %2 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %1) #9 + // X64-NEXT: %3 = shufflevector <8 x float> %2, <8 x float> %0, <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x float> %3, <8 x float> %__A + // X64-NEXT: ret <8 x float> %5 return _mm256_mask_fmaddsub_ps(__A, __U, __B, __C); } __m256 test_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - // CHECK-LABEL: @test_mm256_mask_fmsubadd_ps - // CHECK: [[NEG:%.+]] = fsub <8 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]] - // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: shufflevector <8 x float> [[ADD]], <8 x float> [[SUB]], <8 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_fmsubadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x float> , %__C + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %sub.i) #9 + // X64-NEXT: %1 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C) #9 + // X64-NEXT: %2 = shufflevector <8 x float> %1, <8 x float> %0, <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__A + // X64-NEXT: ret <8 x float> %4 return _mm256_mask_fmsubadd_ps(__A, __U, __B, __C); } __m256 test_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm256_mask3_fmaddsub_ps - // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <8 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]] - // CHECK: shufflevector <8 x float> [[SUB]], <8 x float> [[ADD]], <8 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask3_fmaddsub_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C) #9 + // X64-NEXT: %1 = fsub <8 x float> , %__C + // X64-NEXT: %2 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %1) #9 + // X64-NEXT: %3 = shufflevector <8 x float> %2, <8 x float> %0, <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x float> %3, <8 x float> %__C + // X64-NEXT: ret <8 x float> %5 return _mm256_mask3_fmaddsub_ps(__A, __B, __C, __U); } __m256 test_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - // CHECK-LABEL: @test_mm256_maskz_fmaddsub_ps - // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: [[NEG:%.+]] = fsub <8 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]] - // CHECK: shufflevector <8 x float> [[SUB]], <8 x float> [[ADD]], <8 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_fmaddsub_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C) #9 + // X64-NEXT: %1 = fsub <8 x float> , %__C + // X64-NEXT: %2 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %1) #9 + // X64-NEXT: %3 = shufflevector <8 x float> %2, <8 x float> %0, <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x float> %3, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %5 return _mm256_maskz_fmaddsub_ps(__U, __A, __B, __C); } __m256 test_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - // CHECK-LABEL: @test_mm256_maskz_fmsubadd_ps - // CHECK: [[NEG:%.+]] = fsub <8 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]] - // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: shufflevector <8 x float> [[ADD]], <8 x float> [[SUB]], <8 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_fmsubadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x float> , %__C + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %sub.i) #9 + // X64-NEXT: %1 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C) #9 + // X64-NEXT: %2 = shufflevector <8 x float> %1, <8 x float> %0, <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %4 return _mm256_maskz_fmsubadd_ps(__U, __A, __B, __C); } __m128d test_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm_mask3_fmsub_pd - // CHECK: fsub <2 x double> , %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask3_fmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <2 x double> , %__C + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %sub.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__C + // X64-NEXT: ret <2 x double> %2 return _mm_mask3_fmsub_pd(__A, __B, __C, __U); } __m256d test_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm256_mask3_fmsub_pd - // CHECK: fsub <4 x double> , %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask3_fmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x double> , %__C + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %sub.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__C + // X64-NEXT: ret <4 x double> %2 return _mm256_mask3_fmsub_pd(__A, __B, __C, __U); } __m128 test_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm_mask3_fmsub_ps - // CHECK: fsub <4 x float> , %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask3_fmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x float> , %__C + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %sub.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__C + // X64-NEXT: ret <4 x float> %2 return _mm_mask3_fmsub_ps(__A, __B, __C, __U); } __m256 test_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm256_mask3_fmsub_ps - // CHECK: fsub <8 x float> , %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask3_fmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x float> , %__C + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %sub.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__C + // X64-NEXT: ret <8 x float> %2 return _mm256_mask3_fmsub_ps(__A, __B, __C, __U); } __m128d test_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm_mask3_fmsubadd_pd - // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]] - // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: shufflevector <2 x double> [[ADD]], <2 x double> [[SUB]], <2 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask3_fmsubadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <2 x double> , %__C + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %sub.i) #9 + // X64-NEXT: %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C) #9 + // X64-NEXT: %2 = shufflevector <2 x double> %1, <2 x double> %0, <2 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> + // X64-NEXT: %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__C + // X64-NEXT: ret <2 x double> %4 return _mm_mask3_fmsubadd_pd(__A, __B, __C, __U); } __m256d test_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm256_mask3_fmsubadd_pd - // CHECK: [[NEG:%.+]] = fsub <4 x double> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]] - // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: shufflevector <4 x double> [[ADD]], <4 x double> [[SUB]], <4 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask3_fmsubadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x double> , %__C + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %sub.i) #9 + // X64-NEXT: %1 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C) #9 + // X64-NEXT: %2 = shufflevector <4 x double> %1, <4 x double> %0, <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__C + // X64-NEXT: ret <4 x double> %4 return _mm256_mask3_fmsubadd_pd(__A, __B, __C, __U); } __m128 test_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm_mask3_fmsubadd_ps - // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.+}} - // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]] - // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: shufflevector <4 x float> [[ADD]], <4 x float> [[SUB]], <4 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask3_fmsubadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x float> , %__C + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %sub.i) #9 + // X64-NEXT: %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C) #9 + // X64-NEXT: %2 = shufflevector <4 x float> %1, <4 x float> %0, <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__C + // X64-NEXT: ret <4 x float> %4 return _mm_mask3_fmsubadd_ps(__A, __B, __C, __U); } __m256 test_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm256_mask3_fmsubadd_ps - // CHECK: [[NEG:%.+]] = fsub <8 x float> , %{{.*}} - // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]] - // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: shufflevector <8 x float> [[ADD]], <8 x float> [[SUB]], <8 x i32> - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask3_fmsubadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x float> , %__C + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %sub.i) #9 + // X64-NEXT: %1 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C) #9 + // X64-NEXT: %2 = shufflevector <8 x float> %1, <8 x float> %0, <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__C + // X64-NEXT: ret <8 x float> %4 return _mm256_mask3_fmsubadd_ps(__A, __B, __C, __U); } __m128d test_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - // CHECK-LABEL: @test_mm_mask_fnmadd_pd - // CHECK: fsub <2 x double> , %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_fnmadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <2 x double> , %__B + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %sub.i, <2 x double> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__A + // X64-NEXT: ret <2 x double> %2 return _mm_mask_fnmadd_pd(__A, __U, __B, __C); } __m256d test_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - // CHECK-LABEL: @test_mm256_mask_fnmadd_pd - // CHECK: fsub <4 x double> , %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_fnmadd_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x double> , %__B + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %sub.i, <4 x double> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__A + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_fnmadd_pd(__A, __U, __B, __C); } __m128 test_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - // CHECK-LABEL: @test_mm_mask_fnmadd_ps - // CHECK: fsub <4 x float> , %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_fnmadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x float> , %__B + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %sub.i, <4 x float> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__A + // X64-NEXT: ret <4 x float> %2 return _mm_mask_fnmadd_ps(__A, __U, __B, __C); } __m256 test_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - // CHECK-LABEL: @test_mm256_mask_fnmadd_ps - // CHECK: fsub <8 x float> , %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_fnmadd_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x float> , %__B + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %sub.i, <8 x float> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__A + // X64-NEXT: ret <8 x float> %2 return _mm256_mask_fnmadd_ps(__A, __U, __B, __C); } __m128d test_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - // CHECK-LABEL: @test_mm_mask_fnmsub_pd - // CHECK: fsub <2 x double> , %{{.*}} - // CHECK: fsub <2 x double> , %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_fnmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <2 x double> , %__B + // X64-NEXT: %sub1.i = fsub <2 x double> , %__C + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %sub.i, <2 x double> %sub1.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__A + // X64-NEXT: ret <2 x double> %2 return _mm_mask_fnmsub_pd(__A, __U, __B, __C); } __m128d test_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm_mask3_fnmsub_pd - // CHECK: fsub <2 x double> , %{{.*}} - // CHECK: fsub <2 x double> , %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask3_fnmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <2 x double> , %__B + // X64-NEXT: %sub1.i = fsub <2 x double> , %__C + // X64-NEXT: %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %__A, <2 x double> %sub.i, <2 x double> %sub1.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__C + // X64-NEXT: ret <2 x double> %2 return _mm_mask3_fnmsub_pd(__A, __B, __C, __U); } __m256d test_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - // CHECK-LABEL: @test_mm256_mask_fnmsub_pd - // CHECK: fsub <4 x double> , %{{.*}} - // CHECK: fsub <4 x double> , %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_fnmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x double> , %__B + // X64-NEXT: %sub1.i = fsub <4 x double> , %__C + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %sub.i, <4 x double> %sub1.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__A + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_fnmsub_pd(__A, __U, __B, __C); } __m256d test_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm256_mask3_fnmsub_pd - // CHECK: fsub <4 x double> , %{{.*}} - // CHECK: fsub <4 x double> , %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask3_fnmsub_pd + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x double> , %__B + // X64-NEXT: %sub1.i = fsub <4 x double> , %__C + // X64-NEXT: %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %__A, <4 x double> %sub.i, <4 x double> %sub1.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__C + // X64-NEXT: ret <4 x double> %2 return _mm256_mask3_fnmsub_pd(__A, __B, __C, __U); } __m128 test_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - // CHECK-LABEL: @test_mm_mask_fnmsub_ps - // CHECK: fsub <4 x float> , %{{.*}} - // CHECK: fsub <4 x float> , %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_fnmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x float> , %__B + // X64-NEXT: %sub1.i = fsub <4 x float> , %__C + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %sub.i, <4 x float> %sub1.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__A + // X64-NEXT: ret <4 x float> %2 return _mm_mask_fnmsub_ps(__A, __U, __B, __C); } __m128 test_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm_mask3_fnmsub_ps - // CHECK: fsub <4 x float> , %{{.*}} - // CHECK: fsub <4 x float> , %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask3_fnmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <4 x float> , %__B + // X64-NEXT: %sub1.i = fsub <4 x float> , %__C + // X64-NEXT: %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %__A, <4 x float> %sub.i, <4 x float> %sub1.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__C + // X64-NEXT: ret <4 x float> %2 return _mm_mask3_fnmsub_ps(__A, __B, __C, __U); } __m256 test_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - // CHECK-LABEL: @test_mm256_mask_fnmsub_ps - // CHECK: fsub <8 x float> , %{{.*}} - // CHECK: fsub <8 x float> , %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_fnmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x float> , %__B + // X64-NEXT: %sub1.i = fsub <8 x float> , %__C + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %sub.i, <8 x float> %sub1.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__A + // X64-NEXT: ret <8 x float> %2 return _mm256_mask_fnmsub_ps(__A, __U, __B, __C); } __m256 test_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm256_mask3_fnmsub_ps - // CHECK: fsub <8 x float> , %{{.*}} - // CHECK: fsub <8 x float> , %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask3_fnmsub_ps + // X64: entry: + // X64-NEXT: %sub.i = fsub <8 x float> , %__B + // X64-NEXT: %sub1.i = fsub <8 x float> , %__C + // X64-NEXT: %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %__A, <8 x float> %sub.i, <8 x float> %sub1.i) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__C + // X64-NEXT: ret <8 x float> %2 return _mm256_mask3_fnmsub_ps(__A, __B, __C, __U); } __m128d test_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_add_pd - // CHECK: fadd <2 x double> %{{.*}}, %{{.*}} - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_add_pd + // X64: entry: + // X64-NEXT: %add.i.i = fadd <2 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %add.i.i, <2 x double> %__W + // X64-NEXT: ret <2 x double> %1 return _mm_mask_add_pd(__W,__U,__A,__B); } __m128d test_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_add_pd - // CHECK: fadd <2 x double> %{{.*}}, %{{.*}} - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_add_pd + // X64: entry: + // X64-NEXT: %add.i.i = fadd <2 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %add.i.i, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %1 return _mm_maskz_add_pd(__U,__A,__B); } __m256d test_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_mask_add_pd - // CHECK: fadd <4 x double> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_add_pd + // X64: entry: + // X64-NEXT: %add.i.i = fadd <4 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %add.i.i, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_add_pd(__W,__U,__A,__B); } __m256d test_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_maskz_add_pd - // CHECK: fadd <4 x double> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_add_pd + // X64: entry: + // X64-NEXT: %add.i.i = fadd <4 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %add.i.i, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_add_pd(__U,__A,__B); } __m128 test_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_add_ps - // CHECK: fadd <4 x float> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_add_ps + // X64: entry: + // X64-NEXT: %add.i.i = fadd <4 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %add.i.i, <4 x float> %__W + // X64-NEXT: ret <4 x float> %1 return _mm_mask_add_ps(__W,__U,__A,__B); } __m128 test_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_add_ps - // CHECK: fadd <4 x float> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_add_ps + // X64: entry: + // X64-NEXT: %add.i.i = fadd <4 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %add.i.i, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_add_ps(__U,__A,__B); } __m256 test_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_add_ps - // CHECK: fadd <8 x float> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_add_ps + // X64: entry: + // X64-NEXT: %add.i.i = fadd <8 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %add.i.i, <8 x float> %__W + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_add_ps(__W,__U,__A,__B); } __m256 test_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_add_ps - // CHECK: fadd <8 x float> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_add_ps + // X64: entry: + // X64-NEXT: %add.i.i = fadd <8 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %add.i.i, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_add_ps(__U,__A,__B); } __m128i test_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) { - // CHECK-LABEL: @test_mm_mask_blend_epi32 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_blend_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_blend_epi32(__U,__A,__W); } __m256i test_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) { - // CHECK-LABEL: @test_mm256_mask_blend_epi32 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_blend_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_blend_epi32(__U,__A,__W); } __m128d test_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) { - // CHECK-LABEL: @test_mm_mask_blend_pd - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_blend_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %__W, <2 x double> %__A + // X64-NEXT: ret <2 x double> %1 return _mm_mask_blend_pd(__U,__A,__W); } __m256d test_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) { - // CHECK-LABEL: @test_mm256_mask_blend_pd - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_blend_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %__W, <4 x double> %__A + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_blend_pd(__U,__A,__W); } __m128 test_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) { - // CHECK-LABEL: @test_mm_mask_blend_ps - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_blend_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %__W, <4 x float> %__A + // X64-NEXT: ret <4 x float> %1 return _mm_mask_blend_ps(__U,__A,__W); } __m256 test_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) { - // CHECK-LABEL: @test_mm256_mask_blend_ps - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_blend_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %__W, <8 x float> %__A + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_blend_ps(__U,__A,__W); } __m128i test_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) { - // CHECK-LABEL: @test_mm_mask_blend_epi64 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_blend_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %__W, <2 x i64> %__A + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_blend_epi64(__U,__A,__W); } __m256i test_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) { - // CHECK-LABEL: @test_mm256_mask_blend_epi64 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_blend_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %__W, <4 x i64> %__A + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_blend_epi64(__U,__A,__W); } __m128d test_mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_compress_pd - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm_mask_compress_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = tail call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %__A, <2 x double> %__W, <2 x i1> %extract.i) #9 + // X64-NEXT: ret <2 x double> %1 return _mm_mask_compress_pd(__W,__U,__A); } __m128d test_mm_maskz_compress_pd(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_compress_pd - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm_maskz_compress_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = tail call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %__A, <2 x double> zeroinitializer, <2 x i1> %extract.i) #9 + // X64-NEXT: ret <2 x double> %1 return _mm_maskz_compress_pd(__U,__A); } __m256d test_mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_compress_pd - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm256_mask_compress_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %__A, <4 x double> %__W, <4 x i1> %extract.i) #9 + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_compress_pd(__W,__U,__A); } __m256d test_mm256_maskz_compress_pd(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_compress_pd - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm256_maskz_compress_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %__A, <4 x double> zeroinitializer, <4 x i1> %extract.i) #9 + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_compress_pd(__U,__A); } __m128i test_mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_compress_epi64 - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm_mask_compress_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = tail call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %__A, <2 x i64> %__W, <2 x i1> %extract.i) #9 + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_compress_epi64(__W,__U,__A); } __m128i test_mm_maskz_compress_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_compress_epi64 - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm_maskz_compress_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = tail call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %__A, <2 x i64> zeroinitializer, <2 x i1> %extract.i) #9 + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_compress_epi64(__U,__A); } __m256i test_mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_compress_epi64 - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm256_mask_compress_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %__A, <4 x i64> %__W, <4 x i1> %extract.i) #9 + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_compress_epi64(__W,__U,__A); } __m256i test_mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_compress_epi64 - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm256_maskz_compress_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %__A, <4 x i64> zeroinitializer, <4 x i1> %extract.i) #9 + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_compress_epi64(__U,__A); } __m128 test_mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_compress_ps - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm_mask_compress_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %__A, <4 x float> %__W, <4 x i1> %extract.i) #9 + // X64-NEXT: ret <4 x float> %1 return _mm_mask_compress_ps(__W,__U,__A); } __m128 test_mm_maskz_compress_ps(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_compress_ps - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm_maskz_compress_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %__A, <4 x float> zeroinitializer, <4 x i1> %extract.i) #9 + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_compress_ps(__U,__A); } __m256 test_mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_compress_ps - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm256_mask_compress_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %__A, <8 x float> %__W, <8 x i1> %0) #9 + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_compress_ps(__W,__U,__A); } __m256 test_mm256_maskz_compress_ps(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_compress_ps - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm256_maskz_compress_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %__A, <8 x float> zeroinitializer, <8 x i1> %0) #9 + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_compress_ps(__U,__A); } __m128i test_mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_compress_epi32 - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm_mask_compress_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = tail call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i1> %extract.i) #9 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_compress_epi32(__W,__U,__A); } __m128i test_mm_maskz_compress_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_compress_epi32 - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm_maskz_compress_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %0, <4 x i32> zeroinitializer, <4 x i1> %extract.i) #9 + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_compress_epi32(__U,__A); } __m256i test_mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_compress_epi32 - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm256_mask_compress_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = tail call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %2) #9 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_compress_epi32(__W,__U,__A); } __m256i test_mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_compress_epi32 - // CHECK: @llvm.x86.avx512.mask.compress + // X64-LABEL: test_mm256_maskz_compress_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %0, <8 x i32> zeroinitializer, <8 x i1> %1) #9 + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_compress_epi32(__U,__A); } void test_mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_compressstoreu_pd - // CHECK: @llvm.masked.compressstore.v2f64(<2 x double> %{{.*}}, double* %{{.*}}, <2 x i1> %{{.*}}) + // X64-LABEL: test_mm_mask_compressstoreu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to double* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: tail call void @llvm.masked.compressstore.v2f64(<2 x double> %__A, double* %0, <2 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm_mask_compressstoreu_pd(__P,__U,__A); } void test_mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_compressstoreu_pd - // CHECK: @llvm.masked.compressstore.v4f64(<4 x double> %{{.*}}, double* %{{.*}}, <4 x i1> %{{.*}}) + // X64-LABEL: test_mm256_mask_compressstoreu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to double* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.masked.compressstore.v4f64(<4 x double> %__A, double* %0, <4 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm256_mask_compressstoreu_pd(__P,__U,__A); } void test_mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_compressstoreu_epi64 - // CHECK: @llvm.masked.compressstore.v2i64(<2 x i64> %{{.*}}, i64* %{{.*}}, <2 x i1> %{{.*}}) + // X64-LABEL: test_mm_mask_compressstoreu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to i64* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: tail call void @llvm.masked.compressstore.v2i64(<2 x i64> %__A, i64* %0, <2 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm_mask_compressstoreu_epi64(__P,__U,__A); } void test_mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_compressstoreu_epi64 - // CHECK: @llvm.masked.compressstore.v4i64(<4 x i64> %{{.*}}, i64* %{{.*}}, <4 x i1> %{{.*}}) + // X64-LABEL: test_mm256_mask_compressstoreu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to i64* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.masked.compressstore.v4i64(<4 x i64> %__A, i64* %0, <4 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm256_mask_compressstoreu_epi64(__P,__U,__A); } void test_mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_compressstoreu_ps - // CHECK: @llvm.masked.compressstore.v4f32(<4 x float> %{{.*}}, float* %{{.*}}, <4 x i1> %{{.*}}) + // X64-LABEL: test_mm_mask_compressstoreu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to float* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.masked.compressstore.v4f32(<4 x float> %__A, float* %0, <4 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm_mask_compressstoreu_ps(__P,__U,__A); } void test_mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_compressstoreu_ps - // CHECK: @llvm.masked.compressstore.v8f32(<8 x float> %{{.*}}, float* %{{.*}}, <8 x i1> %{{.*}}) + // X64-LABEL: test_mm256_mask_compressstoreu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to float* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: tail call void @llvm.masked.compressstore.v8f32(<8 x float> %__A, float* %0, <8 x i1> %1) #9 + // X64-NEXT: ret void return _mm256_mask_compressstoreu_ps(__P,__U,__A); } void test_mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_compressstoreu_epi32 - // CHECK: @llvm.masked.compressstore.v4i32(<4 x i32> %{{.*}}, i32* %{{.*}}, <4 x i1> %{{.*}}) + // X64-LABEL: test_mm_mask_compressstoreu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast i8* %__P to i32* + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.masked.compressstore.v4i32(<4 x i32> %0, i32* %1, <4 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm_mask_compressstoreu_epi32(__P,__U,__A); } void test_mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_compressstoreu_epi32 - // CHECK: @llvm.masked.compressstore.v8i32(<8 x i32> %{{.*}}, i32* %{{.*}}, <8 x i1> %{{.*}}) + // X64-LABEL: test_mm256_mask_compressstoreu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast i8* %__P to i32* + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: tail call void @llvm.masked.compressstore.v8i32(<8 x i32> %0, i32* %1, <8 x i1> %2) #9 + // X64-NEXT: ret void return _mm256_mask_compressstoreu_epi32(__P,__U,__A); } __m128d test_mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi32_pd - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> - // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double> - // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // X64-LABEL: test_mm_mask_cvtepi32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> + // X64-NEXT: %conv.i.i = sitofp <2 x i32> %shuffle.i.i to <2 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> %__W + // X64-NEXT: ret <2 x double> %2 return _mm_mask_cvtepi32_pd(__W,__U,__A); } __m128d test_mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepi32_pd - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> - // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double> - // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // X64-LABEL: test_mm_maskz_cvtepi32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> + // X64-NEXT: %conv.i.i = sitofp <2 x i32> %shuffle.i.i to <2 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_cvtepi32_pd(__U,__A); } __m256d test_mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi32_pd - // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double> - // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} + // X64-LABEL: test_mm256_mask_cvtepi32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %conv.i.i = sitofp <4 x i32> %0 to <4 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> %__W + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_cvtepi32_pd(__W,__U,__A); } __m256d test_mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi32_pd - // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double> - // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} + // X64-LABEL: test_mm256_maskz_cvtepi32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %conv.i.i = sitofp <4 x i32> %0 to <4 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_cvtepi32_pd(__U,__A); } __m128 test_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi32_ps - // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float> - // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // X64-LABEL: test_mm_mask_cvtepi32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %conv.i.i = sitofp <4 x i32> %0 to <4 x float> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %conv.i.i, <4 x float> %__W + // X64-NEXT: ret <4 x float> %2 return _mm_mask_cvtepi32_ps(__W,__U,__A); } __m128 test_mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepi32_ps - // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float> - // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // X64-LABEL: test_mm_maskz_cvtepi32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %conv.i.i = sitofp <4 x i32> %0 to <4 x float> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %conv.i.i, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %2 return _mm_maskz_cvtepi32_ps(__U,__A); } __m256 test_mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi32_ps - // CHECK: sitofp <8 x i32> %{{.*}} to <8 x float> - // CHECK: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}} + // X64-LABEL: test_mm256_mask_cvtepi32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %conv.i.i = sitofp <8 x i32> %0 to <8 x float> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %conv.i.i, <8 x float> %__W + // X64-NEXT: ret <8 x float> %2 return _mm256_mask_cvtepi32_ps(__W,__U,__A); } __m256 test_mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi32_ps - // CHECK: sitofp <8 x i32> %{{.*}} to <8 x float> - // CHECK: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}} + // X64-LABEL: test_mm256_maskz_cvtepi32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %conv.i.i = sitofp <8 x i32> %0 to <8 x float> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %conv.i.i, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %2 return _mm256_maskz_cvtepi32_ps(__U,__A); } __m128i test_mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_cvtpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.128 + // X64-LABEL: test_mm_mask_cvtpd_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %__A, <4 x i32> %0, i8 %__U) #9 + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtpd_epi32(__W,__U,__A); } __m128i test_mm_maskz_cvtpd_epi32(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_cvtpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.128 + // X64-LABEL: test_mm_maskz_cvtpd_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %__A, <4 x i32> zeroinitializer, i8 %__U) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvtpd_epi32(__U,__A); } __m128i test_mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_cvtpd_epi32 - // CHECK: @llvm.x86.avx.cvt.pd2dq.256 - // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} + // X64-LABEL: test_mm256_mask_cvtpd_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %__A) #9 + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm256_mask_cvtpd_epi32(__W,__U,__A); } __m128i test_mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtpd_epi32 - // CHECK: @llvm.x86.avx.cvt.pd2dq.256 - // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} + // X64-LABEL: test_mm256_maskz_cvtpd_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm256_maskz_cvtpd_epi32(__U,__A); } __m128 test_mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_cvtpd_ps - // CHECK: @llvm.x86.avx512.mask.cvtpd2ps + // X64-LABEL: test_mm_mask_cvtpd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %__A, <4 x float> %__W, i8 %__U) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_mask_cvtpd_ps(__W,__U,__A); } __m128 test_mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_cvtpd_ps - // CHECK: @llvm.x86.avx512.mask.cvtpd2ps + // X64-LABEL: test_mm_maskz_cvtpd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %__A, <4 x float> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_cvtpd_ps(__U,__A); } __m128 test_mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_cvtpd_ps - // CHECK: @llvm.x86.avx.cvt.pd2.ps.256 - // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // X64-LABEL: test_mm256_mask_cvtpd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__W + // X64-NEXT: ret <4 x float> %2 return _mm256_mask_cvtpd_ps(__W,__U,__A); } __m128 test_mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtpd_ps - // CHECK: @llvm.x86.avx.cvt.pd2.ps.256 - // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // X64-LABEL: test_mm256_maskz_cvtpd_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %2 return _mm256_maskz_cvtpd_ps(__U,__A); } __m128i test_mm_cvtpd_epu32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.128 + // X64-LABEL: test_mm_cvtpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %__A, <4 x i32> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_cvtpd_epu32(__A); } __m128i test_mm_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_cvtpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.128 + // X64-LABEL: test_mm_mask_cvtpd_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %__A, <4 x i32> %0, i8 %__U) #9 + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtpd_epu32(__W,__U,__A); } __m128i test_mm_maskz_cvtpd_epu32(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_cvtpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.128 + // X64-LABEL: test_mm_maskz_cvtpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %__A, <4 x i32> zeroinitializer, i8 %__U) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvtpd_epu32(__U,__A); } __m128i test_mm256_cvtpd_epu32(__m256d __A) { - // CHECK-LABEL: @test_mm256_cvtpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.256 + // X64-LABEL: test_mm256_cvtpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %__A, <4 x i32> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_cvtpd_epu32(__A); } __m128i test_mm256_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_cvtpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.256 + // X64-LABEL: test_mm256_mask_cvtpd_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %__A, <4 x i32> %0, i8 %__U) #9 + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_mask_cvtpd_epu32(__W,__U,__A); } __m128i test_mm256_maskz_cvtpd_epu32(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.256 + // X64-LABEL: test_mm256_maskz_cvtpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %__A, <4 x i32> zeroinitializer, i8 %__U) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_maskz_cvtpd_epu32(__U,__A); } __m128i test_mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_cvtps_epi32 - // CHECK: @llvm.x86.sse2.cvtps2dq - // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} + // X64-LABEL: test_mm_mask_cvtps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %__A) #9 + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_cvtps_epi32(__W,__U,__A); } __m128i test_mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_cvtps_epi32 - // CHECK: @llvm.x86.sse2.cvtps2dq - // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} + // X64-LABEL: test_mm_maskz_cvtps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_cvtps_epi32(__U,__A); } __m256i test_mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_cvtps_epi32 - // CHECK: @llvm.x86.avx.cvt.ps2dq.256 - // CHECK: select <8 x i1> {{.*}}, <8 x i32> {{.*}}, <8 x i32> {{.*}} + // X64-LABEL: test_mm256_mask_cvtps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %__A) #9 + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_cvtps_epi32(__W,__U,__A); } __m256i test_mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtps_epi32 - // CHECK: @llvm.x86.avx.cvt.ps2dq.256 - // CHECK: select <8 x i1> {{.*}}, <8 x i32> {{.*}}, <8 x i32> {{.*}} + // X64-LABEL: test_mm256_maskz_cvtps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> %0, <8 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_cvtps_epi32(__U,__A); } __m128d test_mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_cvtps_pd - // CHECK: fpext <2 x float> %{{.*}} to <2 x double> - // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // X64-LABEL: test_mm_mask_cvtps_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <2 x i32> + // X64-NEXT: %conv.i.i = fpext <2 x float> %shuffle.i.i to <2 x double> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> %__W + // X64-NEXT: ret <2 x double> %1 return _mm_mask_cvtps_pd(__W,__U,__A); } __m128d test_mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_cvtps_pd - // CHECK: fpext <2 x float> %{{.*}} to <2 x double> - // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // X64-LABEL: test_mm_maskz_cvtps_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <2 x i32> + // X64-NEXT: %conv.i.i = fpext <2 x float> %shuffle.i.i to <2 x double> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %1 return _mm_maskz_cvtps_pd(__U,__A); } __m256d test_mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm256_mask_cvtps_pd - // CHECK: fpext <4 x float> %{{.*}} to <4 x double> - // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} + // X64-LABEL: test_mm256_mask_cvtps_pd + // X64: entry: + // X64-NEXT: %conv.i.i = fpext <4 x float> %__A to <4 x double> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_cvtps_pd(__W,__U,__A); } __m256d test_mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtps_pd - // CHECK: fpext <4 x float> %{{.*}} to <4 x double> - // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} + // X64-LABEL: test_mm256_maskz_cvtps_pd + // X64: entry: + // X64-NEXT: %conv.i.i = fpext <4 x float> %__A to <4 x double> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_cvtps_pd(__U,__A); } __m128i test_mm_cvtps_epu32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtps2udq.128 + // X64-LABEL: test_mm_cvtps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %__A, <4 x i32> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_cvtps_epu32(__A); } __m128i test_mm_mask_cvtps_epu32(__m128i __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_cvtps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtps2udq.128 + // X64-LABEL: test_mm_mask_cvtps_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %__A, <4 x i32> %0, i8 %__U) #9 + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtps_epu32(__W,__U,__A); } __m128i test_mm_maskz_cvtps_epu32(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_cvtps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtps2udq.128 + // X64-LABEL: test_mm_maskz_cvtps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %__A, <4 x i32> zeroinitializer, i8 %__U) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvtps_epu32(__U,__A); } __m256i test_mm256_cvtps_epu32(__m256 __A) { - // CHECK-LABEL: @test_mm256_cvtps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtps2udq.256 + // X64-LABEL: test_mm256_cvtps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm256_cvtps_epu32(__A); } __m256i test_mm256_mask_cvtps_epu32(__m256i __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_cvtps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtps2udq.256 + // X64-LABEL: test_mm256_mask_cvtps_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %__A, <8 x i32> %0, i8 %__U) #9 + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_cvtps_epu32(__W,__U,__A); } __m256i test_mm256_maskz_cvtps_epu32(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvtps2udq.256 + // X64-LABEL: test_mm256_maskz_cvtps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 %__U) #9 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_cvtps_epu32(__U,__A); } __m128i test_mm_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_cvttpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.128 + // X64-LABEL: test_mm_mask_cvttpd_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %__A, <4 x i32> %0, i8 %__U) #9 + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvttpd_epi32(__W,__U,__A); } __m128i test_mm_maskz_cvttpd_epi32(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_cvttpd_epi32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.128 + // X64-LABEL: test_mm_maskz_cvttpd_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %__A, <4 x i32> zeroinitializer, i8 %__U) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvttpd_epi32(__U,__A); } __m128i test_mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_cvttpd_epi32 - // CHECK: @llvm.x86.avx.cvtt.pd2dq.256 - // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} + // X64-LABEL: test_mm256_mask_cvttpd_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %__A) #9 + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm256_mask_cvttpd_epi32(__W,__U,__A); } __m128i test_mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_cvttpd_epi32 - // CHECK: @llvm.x86.avx.cvtt.pd2dq.256 - // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} + // X64-LABEL: test_mm256_maskz_cvttpd_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm256_maskz_cvttpd_epi32(__U,__A); } __m128i test_mm_cvttpd_epu32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.128 + // X64-LABEL: test_mm_cvttpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %__A, <4 x i32> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_cvttpd_epu32(__A); } __m128i test_mm_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_cvttpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.128 + // X64-LABEL: test_mm_mask_cvttpd_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %__A, <4 x i32> %0, i8 %__U) #9 + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvttpd_epu32(__W,__U,__A); } __m128i test_mm_maskz_cvttpd_epu32(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_cvttpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.128 + // X64-LABEL: test_mm_maskz_cvttpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %__A, <4 x i32> zeroinitializer, i8 %__U) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvttpd_epu32(__U,__A); } __m128i test_mm256_cvttpd_epu32(__m256d __A) { - // CHECK-LABEL: @test_mm256_cvttpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.256 + // X64-LABEL: test_mm256_cvttpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %__A, <4 x i32> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_cvttpd_epu32(__A); } __m128i test_mm256_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_cvttpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.256 + // X64-LABEL: test_mm256_mask_cvttpd_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %__A, <4 x i32> %0, i8 %__U) #9 + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_mask_cvttpd_epu32(__W,__U,__A); } __m128i test_mm256_maskz_cvttpd_epu32(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_cvttpd_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.256 + // X64-LABEL: test_mm256_maskz_cvttpd_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %__A, <4 x i32> zeroinitializer, i8 %__U) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_maskz_cvttpd_epu32(__U,__A); } __m128i test_mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_cvttps_epi32 - // CHECK: @llvm.x86.sse2.cvttps2dq - // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} + // X64-LABEL: test_mm_mask_cvttps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %__A) #9 + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_cvttps_epi32(__W,__U,__A); } __m128i test_mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_cvttps_epi32 - // CHECK: @llvm.x86.sse2.cvttps2dq - // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} + // X64-LABEL: test_mm_maskz_cvttps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_cvttps_epi32(__U,__A); } __m256i test_mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_cvttps_epi32 - // CHECK: @llvm.x86.avx.cvtt.ps2dq.256 - // CHECK: select <8 x i1> {{.*}}, <8 x i32> {{.*}}, <8 x i32> {{.*}} + // X64-LABEL: test_mm256_mask_cvttps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %__A) #9 + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_cvttps_epi32(__W,__U,__A); } __m256i test_mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_cvttps_epi32 - // CHECK: @llvm.x86.avx.cvtt.ps2dq.256 - // CHECK: select <8 x i1> {{.*}}, <8 x i32> {{.*}}, <8 x i32> {{.*}} + // X64-LABEL: test_mm256_maskz_cvttps_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> %0, <8 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_cvttps_epi32(__U,__A); } __m128i test_mm_cvttps_epu32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttps2udq.128 + // X64-LABEL: test_mm_cvttps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %__A, <4 x i32> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_cvttps_epu32(__A); } __m128i test_mm_mask_cvttps_epu32(__m128i __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_cvttps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttps2udq.128 + // X64-LABEL: test_mm_mask_cvttps_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %__A, <4 x i32> %0, i8 %__U) #9 + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvttps_epu32(__W,__U,__A); } __m128i test_mm_maskz_cvttps_epu32(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_cvttps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttps2udq.128 + // X64-LABEL: test_mm_maskz_cvttps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %__A, <4 x i32> zeroinitializer, i8 %__U) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvttps_epu32(__U,__A); } __m256i test_mm256_cvttps_epu32(__m256 __A) { - // CHECK-LABEL: @test_mm256_cvttps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttps2udq.256 + // X64-LABEL: test_mm256_cvttps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm256_cvttps_epu32(__A); } __m256i test_mm256_mask_cvttps_epu32(__m256i __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_cvttps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttps2udq.256 + // X64-LABEL: test_mm256_mask_cvttps_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %__A, <8 x i32> %0, i8 %__U) #9 + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_cvttps_epu32(__W,__U,__A); } __m256i test_mm256_maskz_cvttps_epu32(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_cvttps_epu32 - // CHECK: @llvm.x86.avx512.mask.cvttps2udq.256 + // X64-LABEL: test_mm256_maskz_cvttps_epu32 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 %__U) #9 + // X64-NEXT: %1 = bitcast <8 x i32> %0 to <4 x i64> + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_cvttps_epu32(__U,__A); } __m128d test_mm_cvtepu32_pd(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtepu32_pd - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> - // CHECK: uitofp <2 x i32> %{{.*}} to <2 x double> + // X64-LABEL: test_mm_cvtepu32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %shuffle.i = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> + // X64-NEXT: %conv.i = uitofp <2 x i32> %shuffle.i to <2 x double> + // X64-NEXT: ret <2 x double> %conv.i return _mm_cvtepu32_pd(__A); } __m128d test_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepu32_pd - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> - // CHECK: uitofp <2 x i32> %{{.*}} to <2 x double> - // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // X64-LABEL: test_mm_mask_cvtepu32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> + // X64-NEXT: %conv.i.i = uitofp <2 x i32> %shuffle.i.i to <2 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> %__W + // X64-NEXT: ret <2 x double> %2 return _mm_mask_cvtepu32_pd(__W,__U,__A); } __m128d test_mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepu32_pd - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> - // CHECK: uitofp <2 x i32> %{{.*}} to <2 x double> - // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // X64-LABEL: test_mm_maskz_cvtepu32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> + // X64-NEXT: %conv.i.i = uitofp <2 x i32> %shuffle.i.i to <2 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_cvtepu32_pd(__U,__A); } __m256d test_mm256_cvtepu32_pd(__m128i __A) { - // CHECK-LABEL: @test_mm256_cvtepu32_pd - // CHECK: uitofp <4 x i32> %{{.*}} to <4 x double> + // X64-LABEL: test_mm256_cvtepu32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %conv.i = uitofp <4 x i32> %0 to <4 x double> + // X64-NEXT: ret <4 x double> %conv.i return _mm256_cvtepu32_pd(__A); } __m256d test_mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepu32_pd - // CHECK: uitofp <4 x i32> %{{.*}} to <4 x double> - // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} + // X64-LABEL: test_mm256_mask_cvtepu32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %conv.i.i = uitofp <4 x i32> %0 to <4 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> %__W + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_cvtepu32_pd(__W,__U,__A); } __m256d test_mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepu32_pd - // CHECK: uitofp <4 x i32> %{{.*}} to <4 x double> - // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} + // X64-LABEL: test_mm256_maskz_cvtepu32_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %conv.i.i = uitofp <4 x i32> %0 to <4 x double> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_cvtepu32_pd(__U,__A); } __m128 test_mm_cvtepu32_ps(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtepu32_ps - // CHECK: uitofp <4 x i32> %{{.*}} to <4 x float> + // X64-LABEL: test_mm_cvtepu32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %conv.i = uitofp <4 x i32> %0 to <4 x float> + // X64-NEXT: ret <4 x float> %conv.i return _mm_cvtepu32_ps(__A); } __m128 test_mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepu32_ps - // CHECK: uitofp <4 x i32> %{{.*}} to <4 x float> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_cvtepu32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %conv.i.i = uitofp <4 x i32> %0 to <4 x float> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %conv.i.i, <4 x float> %__W + // X64-NEXT: ret <4 x float> %2 return _mm_mask_cvtepu32_ps(__W,__U,__A); } __m128 test_mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepu32_ps - // CHECK: uitofp <4 x i32> %{{.*}} to <4 x float> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_cvtepu32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %conv.i.i = uitofp <4 x i32> %0 to <4 x float> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %conv.i.i, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %2 return _mm_maskz_cvtepu32_ps(__U,__A); } __m256 test_mm256_cvtepu32_ps(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtepu32_ps - // CHECK: uitofp <8 x i32> %{{.*}} to <8 x float> + // X64-LABEL: test_mm256_cvtepu32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %conv.i = uitofp <8 x i32> %0 to <8 x float> + // X64-NEXT: ret <8 x float> %conv.i return _mm256_cvtepu32_ps(__A); } __m256 test_mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepu32_ps - // CHECK: uitofp <8 x i32> %{{.*}} to <8 x float> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_cvtepu32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %conv.i.i = uitofp <8 x i32> %0 to <8 x float> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %conv.i.i, <8 x float> %__W + // X64-NEXT: ret <8 x float> %2 return _mm256_mask_cvtepu32_ps(__W,__U,__A); } __m256 test_mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepu32_ps - // CHECK: uitofp <8 x i32> %{{.*}} to <8 x float> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_cvtepu32_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %conv.i.i = uitofp <8 x i32> %0 to <8 x float> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %conv.i.i, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %2 return _mm256_maskz_cvtepu32_ps(__U,__A); } __m128d test_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_div_pd - // CHECK: fdiv <2 x double> %{{.*}}, %{{.*}} - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_div_pd + // X64: entry: + // X64-NEXT: %div.i.i = fdiv <2 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %div.i.i, <2 x double> %__W + // X64-NEXT: ret <2 x double> %1 return _mm_mask_div_pd(__W,__U,__A,__B); } __m128d test_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_div_pd - // CHECK: fdiv <2 x double> %{{.*}}, %{{.*}} - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_div_pd + // X64: entry: + // X64-NEXT: %div.i.i = fdiv <2 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %div.i.i, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %1 return _mm_maskz_div_pd(__U,__A,__B); } __m256d test_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_mask_div_pd - // CHECK: fdiv <4 x double> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_div_pd + // X64: entry: + // X64-NEXT: %div.i.i = fdiv <4 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %div.i.i, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_div_pd(__W,__U,__A,__B); } __m256d test_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_maskz_div_pd - // CHECK: fdiv <4 x double> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_div_pd + // X64: entry: + // X64-NEXT: %div.i.i = fdiv <4 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %div.i.i, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_div_pd(__U,__A,__B); } __m128 test_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_div_ps - // CHECK: fdiv <4 x float> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_div_ps + // X64: entry: + // X64-NEXT: %div.i.i = fdiv <4 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %div.i.i, <4 x float> %__W + // X64-NEXT: ret <4 x float> %1 return _mm_mask_div_ps(__W,__U,__A,__B); } __m128 test_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_div_ps - // CHECK: fdiv <4 x float> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_div_ps + // X64: entry: + // X64-NEXT: %div.i.i = fdiv <4 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %div.i.i, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_div_ps(__U,__A,__B); } __m256 test_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_div_ps - // CHECK: fdiv <8 x float> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_div_ps + // X64: entry: + // X64-NEXT: %div.i.i = fdiv <8 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %div.i.i, <8 x float> %__W + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_div_ps(__W,__U,__A,__B); } __m256 test_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_div_ps - // CHECK: fdiv <8 x float> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_div_ps + // X64: entry: + // X64-NEXT: %div.i.i = fdiv <8 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %div.i.i, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_div_ps(__U,__A,__B); } __m128d test_mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_expand_pd - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm_mask_expand_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = tail call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %__A, <2 x double> %__W, <2 x i1> %extract.i) #9 + // X64-NEXT: ret <2 x double> %1 return _mm_mask_expand_pd(__W,__U,__A); } __m128d test_mm_maskz_expand_pd(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_expand_pd - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm_maskz_expand_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = tail call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %__A, <2 x double> zeroinitializer, <2 x i1> %extract.i) #9 + // X64-NEXT: ret <2 x double> %1 return _mm_maskz_expand_pd(__U,__A); } __m256d test_mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_expand_pd - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm256_mask_expand_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %__A, <4 x double> %__W, <4 x i1> %extract.i) #9 + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_expand_pd(__W,__U,__A); } __m256d test_mm256_maskz_expand_pd(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_expand_pd - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm256_maskz_expand_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %__A, <4 x double> zeroinitializer, <4 x i1> %extract.i) #9 + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_expand_pd(__U,__A); } __m128i test_mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_expand_epi64 - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm_mask_expand_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = tail call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %__A, <2 x i64> %__W, <2 x i1> %extract.i) #9 + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_expand_epi64(__W,__U,__A); } __m128i test_mm_maskz_expand_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_expand_epi64 - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm_maskz_expand_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = tail call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %__A, <2 x i64> zeroinitializer, <2 x i1> %extract.i) #9 + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_expand_epi64(__U,__A); } __m256i test_mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_expand_epi64 - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm256_mask_expand_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %__A, <4 x i64> %__W, <4 x i1> %extract.i) #9 + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_expand_epi64(__W,__U,__A); } __m256i test_mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_expand_epi64 - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm256_maskz_expand_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %__A, <4 x i64> zeroinitializer, <4 x i1> %extract.i) #9 + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_expand_epi64(__U,__A); } __m128d test_mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_expandloadu_pd - // CHECK: @llvm.masked.expandload.v2f64(double* %{{.*}}, <2 x i1> %{{.*}}, <2 x double> %{{.*}}) + // X64-LABEL: test_mm_mask_expandloadu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to double* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x double> @llvm.masked.expandload.v2f64(double* %0, <2 x i1> %extract.i, <2 x double> %__W) #9 + // X64-NEXT: ret <2 x double> %2 return _mm_mask_expandloadu_pd(__W,__U,__P); } __m128d test_mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_expandloadu_pd - // CHECK: @llvm.masked.expandload.v2f64(double* %{{.*}}, <2 x i1> %{{.*}}, <2 x double> %{{.*}}) + // X64-LABEL: test_mm_maskz_expandloadu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to double* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x double> @llvm.masked.expandload.v2f64(double* %0, <2 x i1> %extract.i, <2 x double> zeroinitializer) #9 + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_expandloadu_pd(__U,__P); } __m256d test_mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_expandloadu_pd - // CHECK: @llvm.masked.expandload.v4f64(double* %{{.*}}, <4 x i1> %{{.*}}, <4 x double> %{{.*}}) + // X64-LABEL: test_mm256_mask_expandloadu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to double* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x double> @llvm.masked.expandload.v4f64(double* %0, <4 x i1> %extract.i, <4 x double> %__W) #9 + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_expandloadu_pd(__W,__U,__P); } __m256d test_mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_expandloadu_pd - // CHECK: @llvm.masked.expandload.v4f64(double* %{{.*}}, <4 x i1> %{{.*}}, <4 x double> %{{.*}}) + // X64-LABEL: test_mm256_maskz_expandloadu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to double* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x double> @llvm.masked.expandload.v4f64(double* %0, <4 x i1> %extract.i, <4 x double> zeroinitializer) #9 + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_expandloadu_pd(__U,__P); } __m128i test_mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_expandloadu_epi64 - // CHECK: @llvm.masked.expandload.v2i64(i64* %{{.*}}, <2 x i1> %{{.*}}, <2 x i64> %{{.*}}) + // X64-LABEL: test_mm_mask_expandloadu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to i64* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x i64> @llvm.masked.expandload.v2i64(i64* %0, <2 x i1> %extract.i, <2 x i64> %__W) #9 + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_expandloadu_epi64(__W,__U,__P); } __m128i test_mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_expandloadu_epi64 - // CHECK: @llvm.masked.expandload.v2i64(i64* %{{.*}}, <2 x i1> %{{.*}}, <2 x i64> %{{.*}}) + // X64-LABEL: test_mm_maskz_expandloadu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to i64* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x i64> @llvm.masked.expandload.v2i64(i64* %0, <2 x i1> %extract.i, <2 x i64> zeroinitializer) #9 + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_expandloadu_epi64(__U,__P); } __m256i test_mm256_mask_expandloadu_epi64(__m256i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_expandloadu_epi64 - // CHECK: @llvm.masked.expandload.v4i64(i64* %{{.*}}, <4 x i1> %{{.*}}, <4 x i64> %{{.*}}) + // X64-LABEL: test_mm256_mask_expandloadu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to i64* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x i64> @llvm.masked.expandload.v4i64(i64* %0, <4 x i1> %extract.i, <4 x i64> %__W) #9 + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_expandloadu_epi64(__W,__U,__P); } __m256i test_mm256_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_expandloadu_epi64 - // CHECK: @llvm.masked.expandload.v4i64(i64* %{{.*}}, <4 x i1> %{{.*}}, <4 x i64> %{{.*}}) + // X64-LABEL: test_mm256_maskz_expandloadu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to i64* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x i64> @llvm.masked.expandload.v4i64(i64* %0, <4 x i1> %extract.i, <4 x i64> zeroinitializer) #9 + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_expandloadu_epi64(__U,__P); } __m128 test_mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_expandloadu_ps - // CHECK: @llvm.masked.expandload.v4f32(float* %{{.*}}, <4 x i1> %{{.*}}, <4 x float> %{{.*}}) + // X64-LABEL: test_mm_mask_expandloadu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to float* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x float> @llvm.masked.expandload.v4f32(float* %0, <4 x i1> %extract.i, <4 x float> %__W) #9 + // X64-NEXT: ret <4 x float> %2 return _mm_mask_expandloadu_ps(__W,__U,__P); } __m128 test_mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_expandloadu_ps - // CHECK: @llvm.masked.expandload.v4f32(float* %{{.*}}, <4 x i1> %{{.*}}, <4 x float> %{{.*}}) + // X64-LABEL: test_mm_maskz_expandloadu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to float* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x float> @llvm.masked.expandload.v4f32(float* %0, <4 x i1> %extract.i, <4 x float> zeroinitializer) #9 + // X64-NEXT: ret <4 x float> %2 return _mm_maskz_expandloadu_ps(__U,__P); } __m256 test_mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_expandloadu_ps - // CHECK: @llvm.masked.expandload.v8f32(float* %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}}) + // X64-LABEL: test_mm256_mask_expandloadu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to float* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x float> @llvm.masked.expandload.v8f32(float* %0, <8 x i1> %1, <8 x float> %__W) #9 + // X64-NEXT: ret <8 x float> %2 return _mm256_mask_expandloadu_ps(__W,__U,__P); } __m256 test_mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_expandloadu_ps - // CHECK: @llvm.masked.expandload.v8f32(float* %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}}) + // X64-LABEL: test_mm256_maskz_expandloadu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to float* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x float> @llvm.masked.expandload.v8f32(float* %0, <8 x i1> %1, <8 x float> zeroinitializer) #9 + // X64-NEXT: ret <8 x float> %2 return _mm256_maskz_expandloadu_ps(__U,__P); } __m128i test_mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_expandloadu_epi32 - // CHECK: @llvm.masked.expandload.v4i32(i32* %{{.*}}, <4 x i1> %{{.*}}, <4 x i32> %{{.*}}) + // X64-LABEL: test_mm_mask_expandloadu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %1 = bitcast i8* %__P to i32* + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = tail call <4 x i32> @llvm.masked.expandload.v4i32(i32* %1, <4 x i1> %extract.i, <4 x i32> %0) #9 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_expandloadu_epi32(__W,__U,__P); } __m128i test_mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_expandloadu_epi32 - // CHECK: @llvm.masked.expandload.v4i32(i32* %{{.*}}, <4 x i1> %{{.*}}, <4 x i32> %{{.*}}) + // X64-LABEL: test_mm_maskz_expandloadu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to i32* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.masked.expandload.v4i32(i32* %0, <4 x i1> %extract.i, <4 x i32> zeroinitializer) #9 + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_expandloadu_epi32(__U,__P); } __m256i test_mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_expandloadu_epi32 - // CHECK: @llvm.masked.expandload.v8i32(i32* %{{.*}}, <8 x i1> %{{.*}}, <8 x i32> %{{.*}}) + // X64-LABEL: test_mm256_mask_expandloadu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %1 = bitcast i8* %__P to i32* + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = tail call <8 x i32> @llvm.masked.expandload.v8i32(i32* %1, <8 x i1> %2, <8 x i32> %0) #9 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_expandloadu_epi32(__W,__U,__P); } __m256i test_mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_expandloadu_epi32 - // CHECK: @llvm.masked.expandload.v8i32(i32* %{{.*}}, <8 x i1> %{{.*}}, <8 x i32> %{{.*}}) + // X64-LABEL: test_mm256_maskz_expandloadu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to i32* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.masked.expandload.v8i32(i32* %0, <8 x i1> %1, <8 x i32> zeroinitializer) #9 + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_expandloadu_epi32(__U,__P); } __m128 test_mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_expand_ps - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm_mask_expand_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %__A, <4 x float> %__W, <4 x i1> %extract.i) #9 + // X64-NEXT: ret <4 x float> %1 return _mm_mask_expand_ps(__W,__U,__A); } __m128 test_mm_maskz_expand_ps(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_expand_ps - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm_maskz_expand_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %__A, <4 x float> zeroinitializer, <4 x i1> %extract.i) #9 + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_expand_ps(__U,__A); } __m256 test_mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_expand_ps - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm256_mask_expand_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %__A, <8 x float> %__W, <8 x i1> %0) #9 + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_expand_ps(__W,__U,__A); } __m256 test_mm256_maskz_expand_ps(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_expand_ps - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm256_maskz_expand_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %__A, <8 x float> zeroinitializer, <8 x i1> %0) #9 + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_expand_ps(__U,__A); } __m128i test_mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_expand_epi32 - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm_mask_expand_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = tail call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i1> %extract.i) #9 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_expand_epi32(__W,__U,__A); } __m128i test_mm_maskz_expand_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_expand_epi32 - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm_maskz_expand_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %0, <4 x i32> zeroinitializer, <4 x i1> %extract.i) #9 + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_expand_epi32(__U,__A); } __m256i test_mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_expand_epi32 - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm256_mask_expand_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = tail call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %2) #9 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_expand_epi32(__W,__U,__A); } __m256i test_mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_expand_epi32 - // CHECK: @llvm.x86.avx512.mask.expand + // X64-LABEL: test_mm256_maskz_expand_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %0, <8 x i32> zeroinitializer, <8 x i1> %1) #9 + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_expand_epi32(__U,__A); } __m128d test_mm_getexp_pd(__m128d __A) { - // CHECK-LABEL: @test_mm_getexp_pd - // CHECK: @llvm.x86.avx512.mask.getexp.pd.128 + // X64-LABEL: test_mm_getexp_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.pd.128(<2 x double> %__A, <2 x double> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <2 x double> %0 return _mm_getexp_pd(__A); } __m128d test_mm_mask_getexp_pd(__m128d __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_getexp_pd - // CHECK: @llvm.x86.avx512.mask.getexp.pd.128 + // X64-LABEL: test_mm_mask_getexp_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.pd.128(<2 x double> %__A, <2 x double> %__W, i8 %__U) #9 + // X64-NEXT: ret <2 x double> %0 return _mm_mask_getexp_pd(__W,__U,__A); } __m128d test_mm_maskz_getexp_pd(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_getexp_pd - // CHECK: @llvm.x86.avx512.mask.getexp.pd.128 + // X64-LABEL: test_mm_maskz_getexp_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getexp.pd.128(<2 x double> %__A, <2 x double> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_getexp_pd(__U,__A); } __m256d test_mm256_getexp_pd(__m256d __A) { - // CHECK-LABEL: @test_mm256_getexp_pd - // CHECK: @llvm.x86.avx512.mask.getexp.pd.256 + // X64-LABEL: test_mm256_getexp_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %__A, <4 x double> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_getexp_pd(__A); } __m256d test_mm256_mask_getexp_pd(__m256d __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_getexp_pd - // CHECK: @llvm.x86.avx512.mask.getexp.pd.256 + // X64-LABEL: test_mm256_mask_getexp_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %__A, <4 x double> %__W, i8 %__U) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_mask_getexp_pd(__W,__U,__A); } __m256d test_mm256_maskz_getexp_pd(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_getexp_pd - // CHECK: @llvm.x86.avx512.mask.getexp.pd.256 + // X64-LABEL: test_mm256_maskz_getexp_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %__A, <4 x double> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_maskz_getexp_pd(__U,__A); } __m128 test_mm_getexp_ps(__m128 __A) { - // CHECK-LABEL: @test_mm_getexp_ps - // CHECK: @llvm.x86.avx512.mask.getexp.ps.128 + // X64-LABEL: test_mm_getexp_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ps.128(<4 x float> %__A, <4 x float> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_getexp_ps(__A); } __m128 test_mm_mask_getexp_ps(__m128 __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_getexp_ps - // CHECK: @llvm.x86.avx512.mask.getexp.ps.128 + // X64-LABEL: test_mm_mask_getexp_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ps.128(<4 x float> %__A, <4 x float> %__W, i8 %__U) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_mask_getexp_ps(__W,__U,__A); } __m128 test_mm_maskz_getexp_ps(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_getexp_ps - // CHECK: @llvm.x86.avx512.mask.getexp.ps.128 + // X64-LABEL: test_mm_maskz_getexp_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getexp.ps.128(<4 x float> %__A, <4 x float> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_getexp_ps(__U,__A); } __m256 test_mm256_getexp_ps(__m256 __A) { - // CHECK-LABEL: @test_mm256_getexp_ps - // CHECK: @llvm.x86.avx512.mask.getexp.ps.256 + // X64-LABEL: test_mm256_getexp_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %__A, <8 x float> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <8 x float> %0 return _mm256_getexp_ps(__A); } __m256 test_mm256_mask_getexp_ps(__m256 __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_getexp_ps - // CHECK: @llvm.x86.avx512.mask.getexp.ps.256 + // X64-LABEL: test_mm256_mask_getexp_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %__A, <8 x float> %__W, i8 %__U) #9 + // X64-NEXT: ret <8 x float> %0 return _mm256_mask_getexp_ps(__W,__U,__A); } __m256 test_mm256_maskz_getexp_ps(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_getexp_ps - // CHECK: @llvm.x86.avx512.mask.getexp.ps.256 + // X64-LABEL: test_mm256_maskz_getexp_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %__A, <8 x float> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <8 x float> %0 return _mm256_maskz_getexp_ps(__U,__A); } __m128d test_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_max_pd - // CHECK: @llvm.x86.sse2.max.pd - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_max_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %__A, <2 x double> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__W + // X64-NEXT: ret <2 x double> %2 return _mm_mask_max_pd(__W,__U,__A,__B); } __m128d test_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_max_pd - // CHECK: @llvm.x86.sse2.max.pd - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_max_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %__A, <2 x double> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_max_pd(__U,__A,__B); } __m256d test_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_mask_max_pd - // CHECK: @llvm.x86.avx.max.pd.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_max_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %__A, <4 x double> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__W + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_max_pd(__W,__U,__A,__B); } __m256d test_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_maskz_max_pd - // CHECK: @llvm.x86.avx.max.pd.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_max_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %__A, <4 x double> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_max_pd(__U,__A,__B); } __m128 test_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_max_ps - // CHECK: @llvm.x86.sse.max.ps - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_max_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %__A, <4 x float> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__W + // X64-NEXT: ret <4 x float> %2 return _mm_mask_max_ps(__W,__U,__A,__B); } __m128 test_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_max_ps - // CHECK: @llvm.x86.sse.max.ps - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_max_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %__A, <4 x float> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %2 return _mm_maskz_max_ps(__U,__A,__B); } __m256 test_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_max_ps - // CHECK: @llvm.x86.avx.max.ps.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_max_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %__A, <8 x float> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__W + // X64-NEXT: ret <8 x float> %2 return _mm256_mask_max_ps(__W,__U,__A,__B); } __m256 test_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_max_ps - // CHECK: @llvm.x86.avx.max.ps.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_max_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %__A, <8 x float> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %2 return _mm256_maskz_max_ps(__U,__A,__B); } __m128d test_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_min_pd - // CHECK: @llvm.x86.sse2.min.pd - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_min_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %__A, <2 x double> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__W + // X64-NEXT: ret <2 x double> %2 return _mm_mask_min_pd(__W,__U,__A,__B); } __m128d test_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_min_pd - // CHECK: @llvm.x86.sse2.min.pd - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_min_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %__A, <2 x double> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_min_pd(__U,__A,__B); } __m256d test_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_mask_min_pd - // CHECK: @llvm.x86.avx.min.pd.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_min_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %__A, <4 x double> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__W + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_min_pd(__W,__U,__A,__B); } __m256d test_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_maskz_min_pd - // CHECK: @llvm.x86.avx.min.pd.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_min_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %__A, <4 x double> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_min_pd(__U,__A,__B); } __m128 test_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_min_ps - // CHECK: @llvm.x86.sse.min.ps - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_min_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %__A, <4 x float> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__W + // X64-NEXT: ret <4 x float> %2 return _mm_mask_min_ps(__W,__U,__A,__B); } __m128 test_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_min_ps - // CHECK: @llvm.x86.sse.min.ps - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_min_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %__A, <4 x float> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %2 return _mm_maskz_min_ps(__U,__A,__B); } __m256 test_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_min_ps - // CHECK: @llvm.x86.avx.min.ps.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_min_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %__A, <8 x float> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__W + // X64-NEXT: ret <8 x float> %2 return _mm256_mask_min_ps(__W,__U,__A,__B); } __m256 test_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_min_ps - // CHECK: @llvm.x86.avx.min.ps.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_min_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %__A, <8 x float> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %2 return _mm256_maskz_min_ps(__U,__A,__B); } __m128d test_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_mul_pd - // CHECK: fmul <2 x double> %{{.*}}, %{{.*}} - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_mul_pd + // X64: entry: + // X64-NEXT: %mul.i.i = fmul <2 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %mul.i.i, <2 x double> %__W + // X64-NEXT: ret <2 x double> %1 return _mm_mask_mul_pd(__W,__U,__A,__B); } __m128d test_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_mul_pd - // CHECK: fmul <2 x double> %{{.*}}, %{{.*}} - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_mul_pd + // X64: entry: + // X64-NEXT: %mul.i.i = fmul <2 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %mul.i.i, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %1 return _mm_maskz_mul_pd(__U,__A,__B); } __m256d test_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_mask_mul_pd - // CHECK: fmul <4 x double> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_mul_pd + // X64: entry: + // X64-NEXT: %mul.i.i = fmul <4 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %mul.i.i, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_mul_pd(__W,__U,__A,__B); } __m256d test_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_maskz_mul_pd - // CHECK: fmul <4 x double> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_mul_pd + // X64: entry: + // X64-NEXT: %mul.i.i = fmul <4 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %mul.i.i, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_mul_pd(__U,__A,__B); } __m128 test_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_mul_ps - // CHECK: fmul <4 x float> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_mul_ps + // X64: entry: + // X64-NEXT: %mul.i.i = fmul <4 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %mul.i.i, <4 x float> %__W + // X64-NEXT: ret <4 x float> %1 return _mm_mask_mul_ps(__W,__U,__A,__B); } __m128 test_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_mul_ps - // CHECK: fmul <4 x float> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_mul_ps + // X64: entry: + // X64-NEXT: %mul.i.i = fmul <4 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %mul.i.i, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_mul_ps(__U,__A,__B); } __m256 test_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_mul_ps - // CHECK: fmul <8 x float> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_mul_ps + // X64: entry: + // X64-NEXT: %mul.i.i = fmul <8 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %mul.i.i, <8 x float> %__W + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_mul_ps(__W,__U,__A,__B); } __m256 test_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_mul_ps - // CHECK: fmul <8 x float> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_mul_ps + // X64: entry: + // X64-NEXT: %mul.i.i = fmul <8 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %mul.i.i, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_mul_ps(__U,__A,__B); } __m128i test_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_abs_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = sub <4 x i32> zeroinitializer, %0 + // X64-NEXT: %2 = icmp slt <4 x i32> %0, zeroinitializer + // X64-NEXT: %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> %0 + // X64-NEXT: %4 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %5, <8 x i1> undef, <4 x i32> + // X64-NEXT: %6 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> %4 + // X64-NEXT: %7 = bitcast <4 x i32> %6 to <2 x i64> + // X64-NEXT: ret <2 x i64> %7 return _mm_mask_abs_epi32(__W,__U,__A); } __m128i test_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_abs_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = sub <4 x i32> zeroinitializer, %0 + // X64-NEXT: %2 = icmp slt <4 x i32> %0, zeroinitializer + // X64-NEXT: %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> %0 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_maskz_abs_epi32(__U,__A); } __m256i test_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_abs_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = sub <8 x i32> zeroinitializer, %0 + // X64-NEXT: %2 = icmp slt <8 x i32> %0, zeroinitializer + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %0 + // X64-NEXT: %4 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 + // X64-NEXT: %7 = bitcast <8 x i32> %6 to <4 x i64> + // X64-NEXT: ret <4 x i64> %7 return _mm256_mask_abs_epi32(__W,__U,__A); } __m256i test_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_abs_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = sub <8 x i32> zeroinitializer, %0 + // X64-NEXT: %2 = icmp slt <8 x i32> %0, zeroinitializer + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %0 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_maskz_abs_epi32(__U,__A); } __m128i test_mm_abs_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[A]], zeroinitializer - // CHECK: select <2 x i1> [[CMP]], <2 x i64> [[A]], <2 x i64> [[SUB]] + // X64-LABEL: test_mm_abs_epi64 + // X64: entry: + // X64-NEXT: %0 = sub <2 x i64> zeroinitializer, %__A + // X64-NEXT: %1 = icmp slt <2 x i64> %__A, zeroinitializer + // X64-NEXT: %2 = select <2 x i1> %1, <2 x i64> %0, <2 x i64> %__A + // X64-NEXT: ret <2 x i64> %2 return _mm_abs_epi64(__A); } __m128i test_mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A]], <2 x i64> [[SUB]] - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> [[SEL]], <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_abs_epi64 + // X64: entry: + // X64-NEXT: %0 = sub <2 x i64> zeroinitializer, %__A + // X64-NEXT: %1 = icmp slt <2 x i64> %__A, zeroinitializer + // X64-NEXT: %2 = select <2 x i1> %1, <2 x i64> %0, <2 x i64> %__A + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> + // X64-NEXT: %4 = select <2 x i1> %extract.i, <2 x i64> %2, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_abs_epi64(__W,__U,__A); } __m128i test_mm_maskz_abs_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A]], <2 x i64> [[SUB]] - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> [[SEL]], <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_abs_epi64 + // X64: entry: + // X64-NEXT: %0 = sub <2 x i64> zeroinitializer, %__A + // X64-NEXT: %1 = icmp slt <2 x i64> %__A, zeroinitializer + // X64-NEXT: %2 = select <2 x i1> %1, <2 x i64> %0, <2 x i64> %__A + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> + // X64-NEXT: %4 = select <2 x i1> %extract.i, <2 x i64> %2, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_abs_epi64(__U,__A); } __m256i test_mm256_abs_epi64(__m256i __A) { - // CHECK-LABEL: @test_mm256_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <4 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[A]], zeroinitializer - // CHECK: select <4 x i1> [[CMP]], <4 x i64> [[A]], <4 x i64> [[SUB]] + // X64-LABEL: test_mm256_abs_epi64 + // X64: entry: + // X64-NEXT: %0 = sub <4 x i64> zeroinitializer, %__A + // X64-NEXT: %1 = icmp slt <4 x i64> %__A, zeroinitializer + // X64-NEXT: %2 = select <4 x i1> %1, <4 x i64> %0, <4 x i64> %__A + // X64-NEXT: ret <4 x i64> %2 return _mm256_abs_epi64(__A); } __m256i test_mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <4 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[A]], <4 x i64> [[SUB]] - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> [[SEL]], <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_abs_epi64 + // X64: entry: + // X64-NEXT: %0 = sub <4 x i64> zeroinitializer, %__A + // X64-NEXT: %1 = icmp slt <4 x i64> %__A, zeroinitializer + // X64-NEXT: %2 = select <4 x i1> %1, <4 x i64> %0, <4 x i64> %__A + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i64> %2, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_abs_epi64(__W,__U,__A); } __m256i test_mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <4 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[A]], <4 x i64> [[SUB]] - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> [[SEL]], <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_abs_epi64 + // X64: entry: + // X64-NEXT: %0 = sub <4 x i64> zeroinitializer, %__A + // X64-NEXT: %1 = icmp slt <4 x i64> %__A, zeroinitializer + // X64-NEXT: %2 = select <4 x i1> %1, <4 x i64> %0, <4 x i64> %__A + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i64> %2, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_abs_epi64(__U,__A); } __m128i test_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} + // X64-LABEL: test_mm_maskz_max_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = icmp sgt <4 x i32> %0, %1 + // X64-NEXT: %3 = select <4 x i1> %2, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_maskz_max_epi32(__M,__A,__B); } __m128i test_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} + // X64-LABEL: test_mm_mask_max_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = icmp sgt <4 x i32> %0, %1 + // X64-NEXT: %3 = select <4 x i1> %2, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %5, <8 x i1> undef, <4 x i32> + // X64-NEXT: %6 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> %4 + // X64-NEXT: %7 = bitcast <4 x i32> %6 to <2 x i64> + // X64-NEXT: ret <2 x i64> %7 return _mm_mask_max_epi32(__W,__M,__A,__B); } __m256i test_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} + // X64-LABEL: test_mm256_maskz_max_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = icmp sgt <8 x i32> %0, %1 + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_maskz_max_epi32(__M,__A,__B); } __m256i test_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_max_epi32 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} + // X64-LABEL: test_mm256_mask_max_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = icmp sgt <8 x i32> %0, %1 + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 + // X64-NEXT: %7 = bitcast <8 x i32> %6 to <4 x i64> + // X64-NEXT: ret <4 x i64> %7 return _mm256_mask_max_epi32(__W,__M,__A,__B); } __m128i test_mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] - // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} + // X64-LABEL: test_mm_maskz_max_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp sgt <2 x i64> %__A, %__B + // X64-NEXT: %1 = select <2 x i1> %0, <2 x i64> %__A, <2 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: %3 = select <2 x i1> %extract.i, <2 x i64> %1, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_max_epi64(__M,__A,__B); } __m128i test_mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] - // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} + // X64-LABEL: test_mm_mask_max_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp sgt <2 x i64> %__A, %__B + // X64-NEXT: %1 = select <2 x i1> %0, <2 x i64> %__A, <2 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: %3 = select <2 x i1> %extract.i, <2 x i64> %1, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %3 return _mm_mask_max_epi64(__W,__M,__A,__B); } __m128i test_mm_max_epi64(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // X64-LABEL: test_mm_max_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp sgt <2 x i64> %__A, %__B + // X64-NEXT: %1 = select <2 x i1> %0, <2 x i64> %__A, <2 x i64> %__B + // X64-NEXT: ret <2 x i64> %1 return _mm_max_epi64(__A,__B); } __m256i test_mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} + // X64-LABEL: test_mm256_maskz_max_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp sgt <4 x i64> %__A, %__B + // X64-NEXT: %1 = select <4 x i1> %0, <4 x i64> %__A, <4 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i64> %1, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_max_epi64(__M,__A,__B); } __m256i test_mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} + // X64-LABEL: test_mm256_mask_max_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp sgt <4 x i64> %__A, %__B + // X64-NEXT: %1 = select <4 x i1> %0, <4 x i64> %__A, <4 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i64> %1, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %3 return _mm256_mask_max_epi64(__W,__M,__A,__B); } __m256i test_mm256_max_epi64(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_max_epi64 - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // X64-LABEL: test_mm256_max_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp sgt <4 x i64> %__A, %__B + // X64-NEXT: %1 = select <4 x i1> %0, <4 x i64> %__A, <4 x i64> %__B + // X64-NEXT: ret <4 x i64> %1 return _mm256_max_epi64(__A,__B); } __m128i test_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} + // X64-LABEL: test_mm_maskz_max_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = icmp ugt <4 x i32> %0, %1 + // X64-NEXT: %3 = select <4 x i1> %2, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_maskz_max_epu32(__M,__A,__B); } __m128i test_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} + // X64-LABEL: test_mm_mask_max_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = icmp ugt <4 x i32> %0, %1 + // X64-NEXT: %3 = select <4 x i1> %2, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %5, <8 x i1> undef, <4 x i32> + // X64-NEXT: %6 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> %4 + // X64-NEXT: %7 = bitcast <4 x i32> %6 to <2 x i64> + // X64-NEXT: ret <2 x i64> %7 return _mm_mask_max_epu32(__W,__M,__A,__B); } __m256i test_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} + // X64-LABEL: test_mm256_maskz_max_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = icmp ugt <8 x i32> %0, %1 + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_maskz_max_epu32(__M,__A,__B); } __m256i test_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_max_epu32 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} + // X64-LABEL: test_mm256_mask_max_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = icmp ugt <8 x i32> %0, %1 + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 + // X64-NEXT: %7 = bitcast <8 x i32> %6 to <4 x i64> + // X64-NEXT: ret <4 x i64> %7 return _mm256_mask_max_epu32(__W,__M,__A,__B); } __m128i test_mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] - // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} + // X64-LABEL: test_mm_maskz_max_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ugt <2 x i64> %__A, %__B + // X64-NEXT: %1 = select <2 x i1> %0, <2 x i64> %__A, <2 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: %3 = select <2 x i1> %extract.i, <2 x i64> %1, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_max_epu64(__M,__A,__B); } __m128i test_mm_max_epu64(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // X64-LABEL: test_mm_max_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ugt <2 x i64> %__A, %__B + // X64-NEXT: %1 = select <2 x i1> %0, <2 x i64> %__A, <2 x i64> %__B + // X64-NEXT: ret <2 x i64> %1 return _mm_max_epu64(__A,__B); } __m128i test_mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] - // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} + // X64-LABEL: test_mm_mask_max_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ugt <2 x i64> %__A, %__B + // X64-NEXT: %1 = select <2 x i1> %0, <2 x i64> %__A, <2 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: %3 = select <2 x i1> %extract.i, <2 x i64> %1, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %3 return _mm_mask_max_epu64(__W,__M,__A,__B); } __m256i test_mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} + // X64-LABEL: test_mm256_maskz_max_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ugt <4 x i64> %__A, %__B + // X64-NEXT: %1 = select <4 x i1> %0, <4 x i64> %__A, <4 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i64> %1, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_max_epu64(__M,__A,__B); } __m256i test_mm256_max_epu64(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // X64-LABEL: test_mm256_max_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ugt <4 x i64> %__A, %__B + // X64-NEXT: %1 = select <4 x i1> %0, <4 x i64> %__A, <4 x i64> %__B + // X64-NEXT: ret <4 x i64> %1 return _mm256_max_epu64(__A,__B); } __m256i test_mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_max_epu64 - // CHECK: [[CMP:%.*]] = icmp ugt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} + // X64-LABEL: test_mm256_mask_max_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ugt <4 x i64> %__A, %__B + // X64-NEXT: %1 = select <4 x i1> %0, <4 x i64> %__A, <4 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i64> %1, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %3 return _mm256_mask_max_epu64(__W,__M,__A,__B); } __m128i test_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} + // X64-LABEL: test_mm_maskz_min_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = icmp slt <4 x i32> %0, %1 + // X64-NEXT: %3 = select <4 x i1> %2, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_maskz_min_epi32(__M,__A,__B); } __m128i test_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} + // X64-LABEL: test_mm_mask_min_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = icmp slt <4 x i32> %0, %1 + // X64-NEXT: %3 = select <4 x i1> %2, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %5, <8 x i1> undef, <4 x i32> + // X64-NEXT: %6 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> %4 + // X64-NEXT: %7 = bitcast <4 x i32> %6 to <2 x i64> + // X64-NEXT: ret <2 x i64> %7 return _mm_mask_min_epi32(__W,__M,__A,__B); } __m256i test_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} + // X64-LABEL: test_mm256_maskz_min_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = icmp slt <8 x i32> %0, %1 + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_maskz_min_epi32(__M,__A,__B); } __m256i test_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_min_epi32 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} + // X64-LABEL: test_mm256_mask_min_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = icmp slt <8 x i32> %0, %1 + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 + // X64-NEXT: %7 = bitcast <8 x i32> %6 to <4 x i64> + // X64-NEXT: ret <4 x i64> %7 return _mm256_mask_min_epi32(__W,__M,__A,__B); } __m128i test_mm_min_epi64(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // X64-LABEL: test_mm_min_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp slt <2 x i64> %__A, %__B + // X64-NEXT: %1 = select <2 x i1> %0, <2 x i64> %__A, <2 x i64> %__B + // X64-NEXT: ret <2 x i64> %1 return _mm_min_epi64(__A,__B); } __m128i test_mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] - // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} + // X64-LABEL: test_mm_mask_min_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp slt <2 x i64> %__A, %__B + // X64-NEXT: %1 = select <2 x i1> %0, <2 x i64> %__A, <2 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: %3 = select <2 x i1> %extract.i, <2 x i64> %1, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %3 return _mm_mask_min_epi64(__W,__M,__A,__B); } __m128i test_mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] - // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} + // X64-LABEL: test_mm_maskz_min_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp slt <2 x i64> %__A, %__B + // X64-NEXT: %1 = select <2 x i1> %0, <2 x i64> %__A, <2 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: %3 = select <2 x i1> %extract.i, <2 x i64> %1, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_min_epi64(__M,__A,__B); } __m256i test_mm256_min_epi64(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // X64-LABEL: test_mm256_min_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp slt <4 x i64> %__A, %__B + // X64-NEXT: %1 = select <4 x i1> %0, <4 x i64> %__A, <4 x i64> %__B + // X64-NEXT: ret <4 x i64> %1 return _mm256_min_epi64(__A,__B); } __m256i test_mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} + // X64-LABEL: test_mm256_mask_min_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp slt <4 x i64> %__A, %__B + // X64-NEXT: %1 = select <4 x i1> %0, <4 x i64> %__A, <4 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i64> %1, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %3 return _mm256_mask_min_epi64(__W,__M,__A,__B); } __m256i test_mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_min_epi64 - // CHECK: [[CMP:%.*]] = icmp slt <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} + // X64-LABEL: test_mm256_maskz_min_epi64 + // X64: entry: + // X64-NEXT: %0 = icmp slt <4 x i64> %__A, %__B + // X64-NEXT: %1 = select <4 x i1> %0, <4 x i64> %__A, <4 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i64> %1, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_min_epi64(__M,__A,__B); } __m128i test_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} + // X64-LABEL: test_mm_maskz_min_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = icmp ult <4 x i32> %0, %1 + // X64-NEXT: %3 = select <4 x i1> %2, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_maskz_min_epu32(__M,__A,__B); } __m128i test_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} + // X64-LABEL: test_mm_mask_min_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = icmp ult <4 x i32> %0, %1 + // X64-NEXT: %3 = select <4 x i1> %2, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %5, <8 x i1> undef, <4 x i32> + // X64-NEXT: %6 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> %4 + // X64-NEXT: %7 = bitcast <4 x i32> %6 to <2 x i64> + // X64-NEXT: ret <2 x i64> %7 return _mm_mask_min_epu32(__W,__M,__A,__B); } __m256i test_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} + // X64-LABEL: test_mm256_maskz_min_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = icmp ult <8 x i32> %0, %1 + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_maskz_min_epu32(__M,__A,__B); } __m256i test_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_min_epu32 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} + // X64-LABEL: test_mm256_mask_min_epu32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = icmp ult <8 x i32> %0, %1 + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 + // X64-NEXT: %7 = bitcast <8 x i32> %6 to <4 x i64> + // X64-NEXT: ret <4 x i64> %7 return _mm256_mask_min_epu32(__W,__M,__A,__B); } __m128i test_mm_min_epu64(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // X64-LABEL: test_mm_min_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ult <2 x i64> %__A, %__B + // X64-NEXT: %1 = select <2 x i1> %0, <2 x i64> %__A, <2 x i64> %__B + // X64-NEXT: ret <2 x i64> %1 return _mm_min_epu64(__A,__B); } __m128i test_mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] - // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} + // X64-LABEL: test_mm_mask_min_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ult <2 x i64> %__A, %__B + // X64-NEXT: %1 = select <2 x i1> %0, <2 x i64> %__A, <2 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: %3 = select <2 x i1> %extract.i, <2 x i64> %1, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %3 return _mm_mask_min_epu64(__W,__M,__A,__B); } __m128i test_mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <2 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] - // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} + // X64-LABEL: test_mm_maskz_min_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ult <2 x i64> %__A, %__B + // X64-NEXT: %1 = select <2 x i1> %0, <2 x i64> %__A, <2 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: %3 = select <2 x i1> %extract.i, <2 x i64> %1, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_min_epu64(__M,__A,__B); } __m256i test_mm256_min_epu64(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // X64-LABEL: test_mm256_min_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ult <4 x i64> %__A, %__B + // X64-NEXT: %1 = select <4 x i1> %0, <4 x i64> %__A, <4 x i64> %__B + // X64-NEXT: ret <4 x i64> %1 return _mm256_min_epu64(__A,__B); } __m256i test_mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} + // X64-LABEL: test_mm256_mask_min_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ult <4 x i64> %__A, %__B + // X64-NEXT: %1 = select <4 x i1> %0, <4 x i64> %__A, <4 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i64> %1, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %3 return _mm256_mask_min_epu64(__W,__M,__A,__B); } __m256i test_mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_min_epu64 - // CHECK: [[CMP:%.*]] = icmp ult <4 x i64> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] - // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} + // X64-LABEL: test_mm256_maskz_min_epu64 + // X64: entry: + // X64-NEXT: %0 = icmp ult <4 x i64> %__A, %__B + // X64-NEXT: %1 = select <4 x i1> %0, <4 x i64> %__A, <4 x i64> %__B + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i64> %1, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_min_epu64(__M,__A,__B); } __m128d test_mm_roundscale_pd(__m128d __A) { - // CHECK-LABEL: @test_mm_roundscale_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.128 + // X64-LABEL: test_mm_roundscale_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %__A, i32 4, <2 x double> zeroinitializer, i8 -1) + // X64-NEXT: ret <2 x double> %0 return _mm_roundscale_pd(__A,4); } __m128d test_mm_mask_roundscale_pd(__m128d __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_roundscale_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.128 + // X64-LABEL: test_mm_mask_roundscale_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %__A, i32 4, <2 x double> %__W, i8 %__U) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_roundscale_pd(__W,__U,__A,4); } __m128d test_mm_maskz_roundscale_pd(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_roundscale_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.128 + // X64-LABEL: test_mm_maskz_roundscale_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %__A, i32 4, <2 x double> zeroinitializer, i8 %__U) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_roundscale_pd(__U,__A,4); } __m256d test_mm256_roundscale_pd(__m256d __A) { - // CHECK-LABEL: @test_mm256_roundscale_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.256 + // X64-LABEL: test_mm256_roundscale_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %__A, i32 4, <4 x double> zeroinitializer, i8 -1) + // X64-NEXT: ret <4 x double> %0 return _mm256_roundscale_pd(__A,4); } __m256d test_mm256_mask_roundscale_pd(__m256d __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_roundscale_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.256 + // X64-LABEL: test_mm256_mask_roundscale_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %__A, i32 4, <4 x double> %__W, i8 %__U) + // X64-NEXT: ret <4 x double> %0 return _mm256_mask_roundscale_pd(__W,__U,__A,4); } __m256d test_mm256_maskz_roundscale_pd(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_roundscale_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.256 + // X64-LABEL: test_mm256_maskz_roundscale_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %__A, i32 4, <4 x double> zeroinitializer, i8 %__U) + // X64-NEXT: ret <4 x double> %0 return _mm256_maskz_roundscale_pd(__U,__A,4); } __m128 test_mm_roundscale_ps(__m128 __A) { - // CHECK-LABEL: @test_mm_roundscale_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.128 + // X64-LABEL: test_mm_roundscale_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %__A, i32 4, <4 x float> zeroinitializer, i8 -1) + // X64-NEXT: ret <4 x float> %0 return _mm_roundscale_ps(__A,4); } __m128 test_mm_mask_roundscale_ps(__m128 __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_roundscale_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.128 + // X64-LABEL: test_mm_mask_roundscale_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %__A, i32 4, <4 x float> %__W, i8 %__U) + // X64-NEXT: ret <4 x float> %0 return _mm_mask_roundscale_ps(__W,__U,__A,4); } __m128 test_mm_maskz_roundscale_ps(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_roundscale_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.128 + // X64-LABEL: test_mm_maskz_roundscale_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %__A, i32 4, <4 x float> zeroinitializer, i8 %__U) + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_roundscale_ps(__U,__A, 4); } __m256 test_mm256_roundscale_ps(__m256 __A) { - // CHECK-LABEL: @test_mm256_roundscale_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.256 + // X64-LABEL: test_mm256_roundscale_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %__A, i32 4, <8 x float> zeroinitializer, i8 -1) + // X64-NEXT: ret <8 x float> %0 return _mm256_roundscale_ps(__A,4); } __m256 test_mm256_mask_roundscale_ps(__m256 __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_roundscale_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.256 + // X64-LABEL: test_mm256_mask_roundscale_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %__A, i32 4, <8 x float> %__W, i8 %__U) + // X64-NEXT: ret <8 x float> %0 return _mm256_mask_roundscale_ps(__W,__U,__A,4); } __m256 test_mm256_maskz_roundscale_ps(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_roundscale_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.256 + // X64-LABEL: test_mm256_maskz_roundscale_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %__A, i32 4, <8 x float> zeroinitializer, i8 %__U) + // X64-NEXT: ret <8 x float> %0 return _mm256_maskz_roundscale_ps(__U,__A,4); } __m128d test_mm_scalef_pd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_scalef_pd - // CHECK: @llvm.x86.avx512.mask.scalef.pd.128 + // X64-LABEL: test_mm_scalef_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <2 x double> %0 return _mm_scalef_pd(__A,__B); } __m128d test_mm_mask_scalef_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_scalef_pd - // CHECK: @llvm.x86.avx512.mask.scalef.pd.128 + // X64-LABEL: test_mm_mask_scalef_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U) #9 + // X64-NEXT: ret <2 x double> %0 return _mm_mask_scalef_pd(__W,__U,__A,__B); } __m128d test_mm_maskz_scalef_pd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_scalef_pd - // CHECK: @llvm.x86.avx512.mask.scalef.pd.128 + // X64-LABEL: test_mm_maskz_scalef_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_scalef_pd(__U,__A,__B); } __m256d test_mm256_scalef_pd(__m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_scalef_pd - // CHECK: @llvm.x86.avx512.mask.scalef.pd.256 + // X64-LABEL: test_mm256_scalef_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %__A, <4 x double> %__B, <4 x double> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_scalef_pd(__A,__B); } __m256d test_mm256_mask_scalef_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_mask_scalef_pd - // CHECK: @llvm.x86.avx512.mask.scalef.pd.256 + // X64-LABEL: test_mm256_mask_scalef_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %__A, <4 x double> %__B, <4 x double> %__W, i8 %__U) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_mask_scalef_pd(__W,__U,__A,__B); } __m256d test_mm256_maskz_scalef_pd(__mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_maskz_scalef_pd - // CHECK: @llvm.x86.avx512.mask.scalef.pd.256 + // X64-LABEL: test_mm256_maskz_scalef_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %__A, <4 x double> %__B, <4 x double> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_maskz_scalef_pd(__U,__A,__B); } __m128 test_mm_scalef_ps(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_scalef_ps - // CHECK: @llvm.x86.avx512.mask.scalef.ps.128 + // X64-LABEL: test_mm_scalef_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_scalef_ps(__A,__B); } __m128 test_mm_mask_scalef_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_scalef_ps - // CHECK: @llvm.x86.avx512.mask.scalef.ps.128 + // X64-LABEL: test_mm_mask_scalef_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_mask_scalef_ps(__W,__U,__A,__B); } __m128 test_mm_maskz_scalef_ps(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_scalef_ps - // CHECK: @llvm.x86.avx512.mask.scalef.ps.128 + // X64-LABEL: test_mm_maskz_scalef_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_scalef_ps(__U,__A,__B); } __m256 test_mm256_scalef_ps(__m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_scalef_ps - // CHECK: @llvm.x86.avx512.mask.scalef.ps.256 + // X64-LABEL: test_mm256_scalef_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %__A, <8 x float> %__B, <8 x float> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <8 x float> %0 return _mm256_scalef_ps(__A,__B); } __m256 test_mm256_mask_scalef_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_scalef_ps - // CHECK: @llvm.x86.avx512.mask.scalef.ps.256 + // X64-LABEL: test_mm256_mask_scalef_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %__A, <8 x float> %__B, <8 x float> %__W, i8 %__U) #9 + // X64-NEXT: ret <8 x float> %0 return _mm256_mask_scalef_ps(__W,__U,__A,__B); } __m256 test_mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_scalef_ps - // CHECK: @llvm.x86.avx512.mask.scalef.ps.256 + // X64-LABEL: test_mm256_maskz_scalef_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %__A, <8 x float> %__B, <8 x float> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <8 x float> %0 return _mm256_maskz_scalef_ps(__U,__A,__B); } void test_mm_i64scatter_pd(double *__addr, __m128i __index, __m128d __v1) { - // CHECK-LABEL: @test_mm_i64scatter_pd - // CHECK: @llvm.x86.avx512.mask.scatterdiv2.df + // X64-LABEL: test_mm_i64scatter_pd + // X64: entry: + // X64-NEXT: %0 = bitcast double* %__addr to i8* + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv2.df(i8* %0, <2 x i1> , <2 x i64> %__index, <2 x double> %__v1, i32 2) + // X64-NEXT: ret void return _mm_i64scatter_pd(__addr,__index,__v1,2); } void test_mm_mask_i64scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m128d __v1) { - // CHECK-LABEL: @test_mm_mask_i64scatter_pd - // CHECK: @llvm.x86.avx512.mask.scatterdiv2.df + // X64-LABEL: test_mm_mask_i64scatter_pd + // X64: entry: + // X64-NEXT: %0 = bitcast double* %__addr to i8* + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv2.df(i8* %0, <2 x i1> %extract, <2 x i64> %__index, <2 x double> %__v1, i32 2) + // X64-NEXT: ret void return _mm_mask_i64scatter_pd(__addr,__mask,__index,__v1,2); } void test_mm_i64scatter_epi64(long long *__addr, __m128i __index, __m128i __v1) { - // CHECK-LABEL: @test_mm_i64scatter_epi64 - // CHECK: @llvm.x86.avx512.mask.scatterdiv2.di + // X64-LABEL: test_mm_i64scatter_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i64* %__addr to i8* + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv2.di(i8* %0, <2 x i1> , <2 x i64> %__index, <2 x i64> %__v1, i32 2) + // X64-NEXT: ret void return _mm_i64scatter_epi64(__addr,__index,__v1,2); } void test_mm_mask_i64scatter_epi64(long long *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) { - // CHECK-LABEL: @test_mm_mask_i64scatter_epi64 - // CHECK: @llvm.x86.avx512.mask.scatterdiv2.di + // X64-LABEL: test_mm_mask_i64scatter_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i64* %__addr to i8* + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv2.di(i8* %0, <2 x i1> %extract, <2 x i64> %__index, <2 x i64> %__v1, i32 2) + // X64-NEXT: ret void return _mm_mask_i64scatter_epi64(__addr,__mask,__index,__v1,2); } void test_mm256_i64scatter_pd(double *__addr, __m256i __index, __m256d __v1) { - // CHECK-LABEL: @test_mm256_i64scatter_pd - // CHECK: @llvm.x86.avx512.mask.scatterdiv4.df + // X64-LABEL: test_mm256_i64scatter_pd + // X64: entry: + // X64-NEXT: %0 = bitcast double* %__addr to i8* + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv4.df(i8* %0, <4 x i1> , <4 x i64> %__index, <4 x double> %__v1, i32 2) + // X64-NEXT: ret void return _mm256_i64scatter_pd(__addr,__index,__v1,2); } void test_mm256_mask_i64scatter_pd(double *__addr, __mmask8 __mask, __m256i __index, __m256d __v1) { - // CHECK-LABEL: @test_mm256_mask_i64scatter_pd - // CHECK: @llvm.x86.avx512.mask.scatterdiv4.df + // X64-LABEL: test_mm256_mask_i64scatter_pd + // X64: entry: + // X64-NEXT: %0 = bitcast double* %__addr to i8* + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv4.df(i8* %0, <4 x i1> %extract, <4 x i64> %__index, <4 x double> %__v1, i32 2) + // X64-NEXT: ret void return _mm256_mask_i64scatter_pd(__addr,__mask,__index,__v1,2); } void test_mm256_i64scatter_epi64(long long *__addr, __m256i __index, __m256i __v1) { - // CHECK-LABEL: @test_mm256_i64scatter_epi64 - // CHECK: @llvm.x86.avx512.mask.scatterdiv4.di + // X64-LABEL: test_mm256_i64scatter_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i64* %__addr to i8* + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv4.di(i8* %0, <4 x i1> , <4 x i64> %__index, <4 x i64> %__v1, i32 2) + // X64-NEXT: ret void return _mm256_i64scatter_epi64(__addr,__index,__v1,2); } void test_mm256_mask_i64scatter_epi64(long long *__addr, __mmask8 __mask, __m256i __index, __m256i __v1) { - // CHECK-LABEL: @test_mm256_mask_i64scatter_epi64 - // CHECK: @llvm.x86.avx512.mask.scatterdiv4.di + // X64-LABEL: test_mm256_mask_i64scatter_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i64* %__addr to i8* + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv4.di(i8* %0, <4 x i1> %extract, <4 x i64> %__index, <4 x i64> %__v1, i32 2) + // X64-NEXT: ret void return _mm256_mask_i64scatter_epi64(__addr,__mask,__index,__v1,2); } void test_mm_i64scatter_ps(float *__addr, __m128i __index, __m128 __v1) { - // CHECK-LABEL: @test_mm_i64scatter_ps - // CHECK: @llvm.x86.avx512.mask.scatterdiv4.sf + // X64-LABEL: test_mm_i64scatter_ps + // X64: entry: + // X64-NEXT: %0 = bitcast float* %__addr to i8* + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv4.sf(i8* %0, <2 x i1> , <2 x i64> %__index, <4 x float> %__v1, i32 2) + // X64-NEXT: ret void return _mm_i64scatter_ps(__addr,__index,__v1,2); } void test_mm_mask_i64scatter_ps(float *__addr, __mmask8 __mask, __m128i __index, __m128 __v1) { - // CHECK-LABEL: @test_mm_mask_i64scatter_ps - // CHECK: @llvm.x86.avx512.mask.scatterdiv4.sf + // X64-LABEL: test_mm_mask_i64scatter_ps + // X64: entry: + // X64-NEXT: %0 = bitcast float* %__addr to i8* + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv4.sf(i8* %0, <2 x i1> %extract, <2 x i64> %__index, <4 x float> %__v1, i32 2) + // X64-NEXT: ret void return _mm_mask_i64scatter_ps(__addr,__mask,__index,__v1,2); } void test_mm_i64scatter_epi32(int *__addr, __m128i __index, __m128i __v1) { - // CHECK-LABEL: @test_mm_i64scatter_epi32 - // CHECK: @llvm.x86.avx512.mask.scatterdiv4.si + // X64-LABEL: test_mm_i64scatter_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i32* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__v1 to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv4.si(i8* %0, <2 x i1> , <2 x i64> %__index, <4 x i32> %1, i32 2) + // X64-NEXT: ret void return _mm_i64scatter_epi32(__addr,__index,__v1,2); } void test_mm_mask_i64scatter_epi32(int *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) { - // CHECK-LABEL: @test_mm_mask_i64scatter_epi32 - // CHECK: @llvm.x86.avx512.mask.scatterdiv4.si + // X64-LABEL: test_mm_mask_i64scatter_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i32* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__v1 to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv4.si(i8* %0, <2 x i1> %extract, <2 x i64> %__index, <4 x i32> %1, i32 2) + // X64-NEXT: ret void return _mm_mask_i64scatter_epi32(__addr,__mask,__index,__v1,2); } void test_mm256_i64scatter_ps(float *__addr, __m256i __index, __m128 __v1) { - // CHECK-LABEL: @test_mm256_i64scatter_ps - // CHECK: @llvm.x86.avx512.mask.scatterdiv8.sf + // X64-LABEL: test_mm256_i64scatter_ps + // X64: entry: + // X64-NEXT: %0 = bitcast float* %__addr to i8* + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv8.sf(i8* %0, <4 x i1> , <4 x i64> %__index, <4 x float> %__v1, i32 2) + // X64-NEXT: ret void return _mm256_i64scatter_ps(__addr,__index,__v1,2); } void test_mm256_mask_i64scatter_ps(float *__addr, __mmask8 __mask, __m256i __index, __m128 __v1) { - // CHECK-LABEL: @test_mm256_mask_i64scatter_ps - // CHECK: @llvm.x86.avx512.mask.scatterdiv8.sf + // X64-LABEL: test_mm256_mask_i64scatter_ps + // X64: entry: + // X64-NEXT: %0 = bitcast float* %__addr to i8* + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv8.sf(i8* %0, <4 x i1> %extract, <4 x i64> %__index, <4 x float> %__v1, i32 2) + // X64-NEXT: ret void return _mm256_mask_i64scatter_ps(__addr,__mask,__index,__v1,2); } void test_mm256_i64scatter_epi32(int *__addr, __m256i __index, __m128i __v1) { - // CHECK-LABEL: @test_mm256_i64scatter_epi32 - // CHECK: @llvm.x86.avx512.mask.scatterdiv8.si + // X64-LABEL: test_mm256_i64scatter_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i32* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__v1 to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv8.si(i8* %0, <4 x i1> , <4 x i64> %__index, <4 x i32> %1, i32 2) + // X64-NEXT: ret void return _mm256_i64scatter_epi32(__addr,__index,__v1,2); } void test_mm256_mask_i64scatter_epi32(int *__addr, __mmask8 __mask, __m256i __index, __m128i __v1) { - // CHECK-LABEL: @test_mm256_mask_i64scatter_epi32 - // CHECK: @llvm.x86.avx512.mask.scatterdiv8.si + // X64-LABEL: test_mm256_mask_i64scatter_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i32* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__v1 to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scatterdiv8.si(i8* %0, <4 x i1> %extract, <4 x i64> %__index, <4 x i32> %1, i32 2) + // X64-NEXT: ret void return _mm256_mask_i64scatter_epi32(__addr,__mask,__index,__v1,2); } void test_mm_i32scatter_pd(double *__addr, __m128i __index, __m128d __v1) { - // CHECK-LABEL: @test_mm_i32scatter_pd - // CHECK: @llvm.x86.avx512.mask.scattersiv2.df + // X64-LABEL: test_mm_i32scatter_pd + // X64: entry: + // X64-NEXT: %0 = bitcast double* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv2.df(i8* %0, <2 x i1> , <4 x i32> %1, <2 x double> %__v1, i32 2) + // X64-NEXT: ret void return _mm_i32scatter_pd(__addr,__index,__v1,2); } void test_mm_mask_i32scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m128d __v1) { - // CHECK-LABEL: @test_mm_mask_i32scatter_pd - // CHECK: @llvm.x86.avx512.mask.scattersiv2.df + // X64-LABEL: test_mm_mask_i32scatter_pd + // X64: entry: + // X64-NEXT: %0 = bitcast double* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv2.df(i8* %0, <2 x i1> %extract, <4 x i32> %1, <2 x double> %__v1, i32 2) + // X64-NEXT: ret void return _mm_mask_i32scatter_pd(__addr,__mask,__index,__v1,2); } void test_mm_i32scatter_epi64(long long *__addr, __m128i __index, __m128i __v1) { - // CHECK-LABEL: @test_mm_i32scatter_epi64 - // CHECK: @llvm.x86.avx512.mask.scattersiv2.di + // X64-LABEL: test_mm_i32scatter_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i64* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv2.di(i8* %0, <2 x i1> , <4 x i32> %1, <2 x i64> %__v1, i32 2) + // X64-NEXT: ret void return _mm_i32scatter_epi64(__addr,__index,__v1,2); } void test_mm_mask_i32scatter_epi64(long long *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) { - // CHECK-LABEL: @test_mm_mask_i32scatter_epi64 - // CHECK: @llvm.x86.avx512.mask.scattersiv2.di + // X64-LABEL: test_mm_mask_i32scatter_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i64* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv2.di(i8* %0, <2 x i1> %extract, <4 x i32> %1, <2 x i64> %__v1, i32 2) + // X64-NEXT: ret void return _mm_mask_i32scatter_epi64(__addr,__mask,__index,__v1,2); } void test_mm256_i32scatter_pd(double *__addr, __m128i __index, __m256d __v1) { - // CHECK-LABEL: @test_mm256_i32scatter_pd - // CHECK: @llvm.x86.avx512.mask.scattersiv4.df + // X64-LABEL: test_mm256_i32scatter_pd + // X64: entry: + // X64-NEXT: %0 = bitcast double* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv4.df(i8* %0, <4 x i1> , <4 x i32> %1, <4 x double> %__v1, i32 2) + // X64-NEXT: ret void return _mm256_i32scatter_pd(__addr,__index,__v1,2); } void test_mm256_mask_i32scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m256d __v1) { - // CHECK-LABEL: @test_mm256_mask_i32scatter_pd - // CHECK: @llvm.x86.avx512.mask.scattersiv4.df + // X64-LABEL: test_mm256_mask_i32scatter_pd + // X64: entry: + // X64-NEXT: %0 = bitcast double* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv4.df(i8* %0, <4 x i1> %extract, <4 x i32> %1, <4 x double> %__v1, i32 2) + // X64-NEXT: ret void return _mm256_mask_i32scatter_pd(__addr,__mask,__index,__v1,2); } void test_mm256_i32scatter_epi64(long long *__addr, __m128i __index, __m256i __v1) { - // CHECK-LABEL: @test_mm256_i32scatter_epi64 - // CHECK: @llvm.x86.avx512.mask.scattersiv4.di + // X64-LABEL: test_mm256_i32scatter_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i64* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv4.di(i8* %0, <4 x i1> , <4 x i32> %1, <4 x i64> %__v1, i32 2) + // X64-NEXT: ret void return _mm256_i32scatter_epi64(__addr,__index,__v1,2); } void test_mm256_mask_i32scatter_epi64(long long *__addr, __mmask8 __mask, __m128i __index, __m256i __v1) { - // CHECK-LABEL: @test_mm256_mask_i32scatter_epi64 - // CHECK: @llvm.x86.avx512.mask.scattersiv4.di + // X64-LABEL: test_mm256_mask_i32scatter_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i64* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv4.di(i8* %0, <4 x i1> %extract, <4 x i32> %1, <4 x i64> %__v1, i32 2) + // X64-NEXT: ret void return _mm256_mask_i32scatter_epi64(__addr,__mask,__index,__v1,2); } void test_mm_i32scatter_ps(float *__addr, __m128i __index, __m128 __v1) { - // CHECK-LABEL: @test_mm_i32scatter_ps - // CHECK: @llvm.x86.avx512.mask.scattersiv4.sf + // X64-LABEL: test_mm_i32scatter_ps + // X64: entry: + // X64-NEXT: %0 = bitcast float* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv4.sf(i8* %0, <4 x i1> , <4 x i32> %1, <4 x float> %__v1, i32 2) + // X64-NEXT: ret void return _mm_i32scatter_ps(__addr,__index,__v1,2); } void test_mm_mask_i32scatter_ps(float *__addr, __mmask8 __mask, __m128i __index, __m128 __v1) { - // CHECK-LABEL: @test_mm_mask_i32scatter_ps - // CHECK: @llvm.x86.avx512.mask.scattersiv4.sf + // X64-LABEL: test_mm_mask_i32scatter_ps + // X64: entry: + // X64-NEXT: %0 = bitcast float* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv4.sf(i8* %0, <4 x i1> %extract, <4 x i32> %1, <4 x float> %__v1, i32 2) + // X64-NEXT: ret void return _mm_mask_i32scatter_ps(__addr,__mask,__index,__v1,2); } void test_mm_i32scatter_epi32(int *__addr, __m128i __index, __m128i __v1) { - // CHECK-LABEL: @test_mm_i32scatter_epi32 - // CHECK: @llvm.x86.avx512.mask.scattersiv4.si + // X64-LABEL: test_mm_i32scatter_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i32* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: %2 = bitcast <2 x i64> %__v1 to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv4.si(i8* %0, <4 x i1> , <4 x i32> %1, <4 x i32> %2, i32 2) + // X64-NEXT: ret void return _mm_i32scatter_epi32(__addr,__index,__v1,2); } void test_mm_mask_i32scatter_epi32(int *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) { - // CHECK-LABEL: @test_mm_mask_i32scatter_epi32 - // CHECK: @llvm.x86.avx512.mask.scattersiv4.si + // X64-LABEL: test_mm_mask_i32scatter_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i32* %__addr to i8* + // X64-NEXT: %1 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: %2 = bitcast <2 x i64> %__v1 to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv4.si(i8* %0, <4 x i1> %extract, <4 x i32> %1, <4 x i32> %2, i32 2) + // X64-NEXT: ret void return _mm_mask_i32scatter_epi32(__addr,__mask,__index,__v1,2); } void test_mm256_i32scatter_ps(float *__addr, __m256i __index, __m256 __v1) { - // CHECK-LABEL: @test_mm256_i32scatter_ps - // CHECK: @llvm.x86.avx512.mask.scattersiv8.sf + // X64-LABEL: test_mm256_i32scatter_ps + // X64: entry: + // X64-NEXT: %0 = bitcast float* %__addr to i8* + // X64-NEXT: %1 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv8.sf(i8* %0, <8 x i1> , <8 x i32> %1, <8 x float> %__v1, i32 2) + // X64-NEXT: ret void return _mm256_i32scatter_ps(__addr,__index,__v1,2); } void test_mm256_mask_i32scatter_ps(float *__addr, __mmask8 __mask, __m256i __index, __m256 __v1) { - // CHECK-LABEL: @test_mm256_mask_i32scatter_ps - // CHECK: @llvm.x86.avx512.mask.scattersiv8.sf + // X64-LABEL: test_mm256_mask_i32scatter_ps + // X64: entry: + // X64-NEXT: %0 = bitcast float* %__addr to i8* + // X64-NEXT: %1 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv8.sf(i8* %0, <8 x i1> %2, <8 x i32> %1, <8 x float> %__v1, i32 2) + // X64-NEXT: ret void return _mm256_mask_i32scatter_ps(__addr,__mask,__index,__v1,2); } void test_mm256_i32scatter_epi32(int *__addr, __m256i __index, __m256i __v1) { - // CHECK-LABEL: @test_mm256_i32scatter_epi32 - // CHECK: @llvm.x86.avx512.mask.scattersiv8.si + // X64-LABEL: test_mm256_i32scatter_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i32* %__addr to i8* + // X64-NEXT: %1 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__v1 to <8 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %0, <8 x i1> , <8 x i32> %1, <8 x i32> %2, i32 2) + // X64-NEXT: ret void return _mm256_i32scatter_epi32(__addr,__index,__v1,2); } void test_mm256_mask_i32scatter_epi32(int *__addr, __mmask8 __mask, __m256i __index, __m256i __v1) { - // CHECK-LABEL: @test_mm256_mask_i32scatter_epi32 - // CHECK: @llvm.x86.avx512.mask.scattersiv8.si + // X64-LABEL: test_mm256_mask_i32scatter_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i32* %__addr to i8* + // X64-NEXT: %1 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__v1 to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %0, <8 x i1> %3, <8 x i32> %1, <8 x i32> %2, i32 2) + // X64-NEXT: ret void return _mm256_mask_i32scatter_epi32(__addr,__mask,__index,__v1,2); } __m128d test_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_sqrt_pd - // CHECK: @llvm.sqrt.v2f64 - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_sqrt_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__W + // X64-NEXT: ret <2 x double> %2 return _mm_mask_sqrt_pd(__W,__U,__A); } __m128d test_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_sqrt_pd - // CHECK: @llvm.sqrt.v2f64 - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_sqrt_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_sqrt_pd(__U,__A); } __m256d test_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_sqrt_pd - // CHECK: @llvm.sqrt.v4f64 - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_sqrt_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__W + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_sqrt_pd(__W,__U,__A); } __m256d test_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_sqrt_pd - // CHECK: @llvm.sqrt.v4f64 - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_sqrt_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_sqrt_pd(__U,__A); } __m128 test_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_sqrt_ps - // CHECK: @llvm.sqrt.v4f32 - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_sqrt_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__W + // X64-NEXT: ret <4 x float> %2 return _mm_mask_sqrt_ps(__W,__U,__A); } __m128 test_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_sqrt_ps - // CHECK: @llvm.sqrt.v4f32 - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_sqrt_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %2 return _mm_maskz_sqrt_ps(__U,__A); } __m256 test_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_sqrt_ps - // CHECK: @llvm.sqrt.v8f32 - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_sqrt_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__W + // X64-NEXT: ret <8 x float> %2 return _mm256_mask_sqrt_ps(__W,__U,__A); } __m256 test_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_sqrt_ps - // CHECK: @llvm.sqrt.v8f32 - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_sqrt_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %__A) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %2 return _mm256_maskz_sqrt_ps(__U,__A); } __m128d test_mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_sub_pd - // CHECK: fsub <2 x double> %{{.*}}, %{{.*}} - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_sub_pd + // X64: entry: + // X64-NEXT: %sub.i.i = fsub <2 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %sub.i.i, <2 x double> %__W + // X64-NEXT: ret <2 x double> %1 return _mm_mask_sub_pd(__W,__U,__A,__B); } __m128d test_mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_sub_pd - // CHECK: fsub <2 x double> %{{.*}}, %{{.*}} - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_sub_pd + // X64: entry: + // X64-NEXT: %sub.i.i = fsub <2 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %sub.i.i, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %1 return _mm_maskz_sub_pd(__U,__A,__B); } __m256d test_mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_mask_sub_pd - // CHECK: fsub <4 x double> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_sub_pd + // X64: entry: + // X64-NEXT: %sub.i.i = fsub <4 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %sub.i.i, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_sub_pd(__W,__U,__A,__B); } __m256d test_mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_maskz_sub_pd - // CHECK: fsub <4 x double> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_sub_pd + // X64: entry: + // X64-NEXT: %sub.i.i = fsub <4 x double> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %sub.i.i, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_sub_pd(__U,__A,__B); } __m128 test_mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_sub_ps - // CHECK: fsub <4 x float> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_sub_ps + // X64: entry: + // X64-NEXT: %sub.i.i = fsub <4 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %sub.i.i, <4 x float> %__W + // X64-NEXT: ret <4 x float> %1 return _mm_mask_sub_ps(__W,__U,__A,__B); } __m128 test_mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_sub_ps - // CHECK: fsub <4 x float> %{{.*}}, %{{.*}} - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_sub_ps + // X64: entry: + // X64-NEXT: %sub.i.i = fsub <4 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %sub.i.i, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_sub_ps(__U,__A,__B); } __m256 test_mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_sub_ps - // CHECK: fsub <8 x float> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_sub_ps + // X64: entry: + // X64-NEXT: %sub.i.i = fsub <8 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %sub.i.i, <8 x float> %__W + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_sub_ps(__W,__U,__A,__B); } __m256 test_mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_sub_ps - // CHECK: fsub <8 x float> %{{.*}}, %{{.*}} - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_sub_ps + // X64: entry: + // X64-NEXT: %sub.i.i = fsub <8 x float> %__A, %__B + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %sub.i.i, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_sub_ps(__U,__A,__B); } __m128i test_mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { - // CHECK-LABEL: @test_mm_mask2_permutex2var_epi32 - // CHECK: @llvm.x86.avx512.vpermi2var.d.128 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask2_permutex2var_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__I to <4 x i32> + // X64-NEXT: %2 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %3 = tail call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) #9 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> %1 + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_mask2_permutex2var_epi32(__A,__I,__U,__B); } __m256i test_mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask2_permutex2var_epi32 - // CHECK: @llvm.x86.avx512.vpermi2var.d.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask2_permutex2var_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__I to <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %3 = tail call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2) #9 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> %1 + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask2_permutex2var_epi32(__A,__I,__U,__B); } __m128d test_mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { - // CHECK-LABEL: @test_mm_mask2_permutex2var_pd - // CHECK: @llvm.x86.avx512.vpermi2var.pd.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask2_permutex2var_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> %__A, <2 x i64> %__I, <2 x double> %__B) #9 + // X64-NEXT: %1 = bitcast <2 x i64> %__I to <2 x double> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> + // X64-NEXT: %3 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %1 + // X64-NEXT: ret <2 x double> %3 return _mm_mask2_permutex2var_pd(__A,__I,__U,__B); } __m256d test_mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B) { - // CHECK-LABEL: @test_mm256_mask2_permutex2var_pd - // CHECK: @llvm.x86.avx512.vpermi2var.pd.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask2_permutex2var_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %__A, <4 x i64> %__I, <4 x double> %__B) #9 + // X64-NEXT: %1 = bitcast <4 x i64> %__I to <4 x double> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %1 + // X64-NEXT: ret <4 x double> %3 return _mm256_mask2_permutex2var_pd(__A,__I,__U,__B); } __m128 test_mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { - // CHECK-LABEL: @test_mm_mask2_permutex2var_ps - // CHECK: @llvm.x86.avx512.vpermi2var.ps.128 - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask2_permutex2var_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__I to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %__A, <4 x i32> %0, <4 x float> %__B) #9 + // X64-NEXT: %2 = bitcast <2 x i64> %__I to <4 x float> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x float> %1, <4 x float> %2 + // X64-NEXT: ret <4 x float> %4 return _mm_mask2_permutex2var_ps(__A,__I,__U,__B); } __m256 test_mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask2_permutex2var_ps - // CHECK: @llvm.x86.avx512.vpermi2var.ps.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask2_permutex2var_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__I to <8 x i32> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> %__A, <8 x i32> %0, <8 x float> %__B) #9 + // X64-NEXT: %2 = bitcast <4 x i64> %__I to <8 x float> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x float> %1, <8 x float> %2 + // X64-NEXT: ret <8 x float> %4 return _mm256_mask2_permutex2var_ps(__A,__I,__U,__B); } __m128i test_mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { - // CHECK-LABEL: @test_mm_mask2_permutex2var_epi64 - // CHECK: @llvm.x86.avx512.vpermi2var.q.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask2_permutex2var_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__I + // X64-NEXT: ret <2 x i64> %2 return _mm_mask2_permutex2var_epi64(__A,__I,__U,__B); } __m256i test_mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask2_permutex2var_epi64 - // CHECK: @llvm.x86.avx512.vpermi2var.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask2_permutex2var_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__I + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask2_permutex2var_epi64(__A,__I,__U,__B); } __m128i test_mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { - // CHECK-LABEL: @test_mm_permutex2var_epi32 - // CHECK: @llvm.x86.avx512.vpermi2var.d.128 + // X64-LABEL: test_mm_permutex2var_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__I to <4 x i32> + // X64-NEXT: %2 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %3 = tail call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) #9 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_permutex2var_epi32(__A,__I,__B); } __m128i test_mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_permutex2var_epi32 - // CHECK: @llvm.x86.avx512.vpermi2var.d.128 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_permutex2var_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__I to <4 x i32> + // X64-NEXT: %2 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %3 = tail call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) #9 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> %0 + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_mask_permutex2var_epi32(__A,__U,__I,__B); } __m128i test_mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_permutex2var_epi32 - // CHECK: @llvm.x86.avx512.vpermi2var.d.128 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_permutex2var_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__I to <4 x i32> + // X64-NEXT: %2 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %3 = tail call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) #9 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_maskz_permutex2var_epi32(__U,__A,__I,__B); } __m256i test_mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { - // CHECK-LABEL: @test_mm256_permutex2var_epi32 - // CHECK: @llvm.x86.avx512.vpermi2var.d.256 + // X64-LABEL: test_mm256_permutex2var_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__I to <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %3 = tail call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2) #9 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_permutex2var_epi32(__A,__I,__B); } __m256i test_mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_permutex2var_epi32 - // CHECK: @llvm.x86.avx512.vpermi2var.d.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_permutex2var_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__I to <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %3 = tail call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2) #9 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> %0 + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask_permutex2var_epi32(__A,__U,__I,__B); } __m256i test_mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_permutex2var_epi32 - // CHECK: @llvm.x86.avx512.vpermi2var.d.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_permutex2var_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__I to <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %3 = tail call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2) #9 + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_maskz_permutex2var_epi32(__U,__A,__I,__B); } __m128d test_mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { - // CHECK-LABEL: @test_mm_permutex2var_pd - // CHECK: @llvm.x86.avx512.vpermi2var.pd.128 + // X64-LABEL: test_mm_permutex2var_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> %__A, <2 x i64> %__I, <2 x double> %__B) #9 + // X64-NEXT: ret <2 x double> %0 return _mm_permutex2var_pd(__A,__I,__B); } __m128d test_mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_permutex2var_pd - // CHECK: @llvm.x86.avx512.vpermi2var.pd.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_permutex2var_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> %__A, <2 x i64> %__I, <2 x double> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__A + // X64-NEXT: ret <2 x double> %2 return _mm_mask_permutex2var_pd(__A,__U,__I,__B); } __m128d test_mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_permutex2var_pd - // CHECK: @llvm.x86.avx512.vpermi2var.pd.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_permutex2var_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> %__A, <2 x i64> %__I, <2 x double> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_permutex2var_pd(__U,__A,__I,__B); } __m256d test_mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { - // CHECK-LABEL: @test_mm256_permutex2var_pd - // CHECK: @llvm.x86.avx512.vpermi2var.pd.256 + // X64-LABEL: test_mm256_permutex2var_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %__A, <4 x i64> %__I, <4 x double> %__B) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_permutex2var_pd(__A,__I,__B); } __m256d test_mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B) { - // CHECK-LABEL: @test_mm256_mask_permutex2var_pd - // CHECK: @llvm.x86.avx512.vpermi2var.pd.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_permutex2var_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %__A, <4 x i64> %__I, <4 x double> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__A + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_permutex2var_pd(__A,__U,__I,__B); } __m256d test_mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B) { - // CHECK-LABEL: @test_mm256_maskz_permutex2var_pd - // CHECK: @llvm.x86.avx512.vpermi2var.pd.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_permutex2var_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %__A, <4 x i64> %__I, <4 x double> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_permutex2var_pd(__U,__A,__I,__B); } __m128 test_mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { - // CHECK-LABEL: @test_mm_permutex2var_ps - // CHECK: @llvm.x86.avx512.vpermi2var.ps.128 + // X64-LABEL: test_mm_permutex2var_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__I to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %__A, <4 x i32> %0, <4 x float> %__B) #9 + // X64-NEXT: ret <4 x float> %1 return _mm_permutex2var_ps(__A,__I,__B); } __m128 test_mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_permutex2var_ps - // CHECK: @llvm.x86.avx512.vpermi2var.ps.128 - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_permutex2var_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__I to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %__A, <4 x i32> %0, <4 x float> %__B) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x float> %1, <4 x float> %__A + // X64-NEXT: ret <4 x float> %3 return _mm_mask_permutex2var_ps(__A,__U,__I,__B); } __m128 test_mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_permutex2var_ps - // CHECK: @llvm.x86.avx512.vpermi2var.ps.128 - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_permutex2var_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__I to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %__A, <4 x i32> %0, <4 x float> %__B) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x float> %1, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %3 return _mm_maskz_permutex2var_ps(__U,__A,__I,__B); } __m256 test_mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { - // CHECK-LABEL: @test_mm256_permutex2var_ps - // CHECK: @llvm.x86.avx512.vpermi2var.ps.256 + // X64-LABEL: test_mm256_permutex2var_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__I to <8 x i32> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> %__A, <8 x i32> %0, <8 x float> %__B) #9 + // X64-NEXT: ret <8 x float> %1 return _mm256_permutex2var_ps(__A,__I,__B); } __m256 test_mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_permutex2var_ps - // CHECK: @llvm.x86.avx512.vpermi2var.ps.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_permutex2var_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__I to <8 x i32> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> %__A, <8 x i32> %0, <8 x float> %__B) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %__A + // X64-NEXT: ret <8 x float> %3 return _mm256_mask_permutex2var_ps(__A,__U,__I,__B); } __m256 test_mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_permutex2var_ps - // CHECK: @llvm.x86.avx512.vpermi2var.ps.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_permutex2var_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__I to <8 x i32> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> %__A, <8 x i32> %0, <8 x float> %__B) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %3 return _mm256_maskz_permutex2var_ps(__U,__A,__I,__B); } __m128i test_mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { - // CHECK-LABEL: @test_mm_permutex2var_epi64 - // CHECK: @llvm.x86.avx512.vpermi2var.q.128 + // X64-LABEL: test_mm_permutex2var_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) #9 + // X64-NEXT: ret <2 x i64> %0 return _mm_permutex2var_epi64(__A,__I,__B); } __m128i test_mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_permutex2var_epi64 - // CHECK: @llvm.x86.avx512.vpermi2var.q.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_permutex2var_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__A + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_permutex2var_epi64(__A,__U,__I,__B); } __m128i test_mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_permutex2var_epi64 - // CHECK: @llvm.x86.avx512.vpermi2var.q.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_permutex2var_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_permutex2var_epi64(__U,__A,__I,__B); } __m256i test_mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { - // CHECK-LABEL: @test_mm256_permutex2var_epi64 - // CHECK: @llvm.x86.avx512.vpermi2var.q.256 + // X64-LABEL: test_mm256_permutex2var_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) #9 + // X64-NEXT: ret <4 x i64> %0 return _mm256_permutex2var_epi64(__A,__I,__B); } __m256i test_mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_permutex2var_epi64 - // CHECK: @llvm.x86.avx512.vpermi2var.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_permutex2var_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__A + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_permutex2var_epi64(__A,__U,__I,__B); } __m256i test_mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_permutex2var_epi64 - // CHECK: @llvm.x86.avx512.vpermi2var.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_permutex2var_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_permutex2var_epi64(__U,__A,__I,__B); } __m128i test_mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi8_epi32 - // CHECK: sext <4 x i8> %{{.*}} to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_cvtepi8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <4 x i32> + // X64-NEXT: %conv.i.i = sext <4 x i8> %shuffle.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_cvtepi8_epi32(__W, __U, __A); } __m128i test_mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepi8_epi32 - // CHECK: sext <4 x i8> %{{.*}} to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_cvtepi8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <4 x i32> + // X64-NEXT: %conv.i.i = sext <4 x i8> %shuffle.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_cvtepi8_epi32(__U, __A); } __m256i test_mm256_mask_cvtepi8_epi32(__m256i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi8_epi32 - // CHECK: sext <8 x i8> %{{.*}} to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_cvtepi8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // X64-NEXT: %conv.i.i = sext <8 x i8> %shuffle.i.i to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %conv.i.i, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_cvtepi8_epi32(__W, __U, __A); } __m256i test_mm256_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi8_epi32 - // CHECK: sext <8 x i8> %{{.*}} to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_cvtepi8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // X64-NEXT: %conv.i.i = sext <8 x i8> %shuffle.i.i to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> %conv.i.i, <8 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_cvtepi8_epi32(__U, __A); } __m128i test_mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi8_epi64 - // CHECK: sext <2 x i8> %{{.*}} to <2 x i64> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_cvtepi8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <2 x i32> + // X64-NEXT: %conv.i.i = sext <2 x i8> %shuffle.i.i to <2 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %conv.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtepi8_epi64(__W, __U, __A); } __m128i test_mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepi8_epi64 - // CHECK: sext <2 x i8> %{{.*}} to <2 x i64> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_cvtepi8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <2 x i32> + // X64-NEXT: %conv.i.i = sext <2 x i8> %shuffle.i.i to <2 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %conv.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtepi8_epi64(__U, __A); } __m256i test_mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi8_epi64 - // CHECK: sext <4 x i8> %{{.*}} to <4 x i64> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_cvtepi8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <4 x i32> + // X64-NEXT: %conv.i.i = sext <4 x i8> %shuffle.i.i to <4 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %conv.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_cvtepi8_epi64(__W, __U, __A); } __m256i test_mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi8_epi64 - // CHECK: sext <4 x i8> %{{.*}} to <4 x i64> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_cvtepi8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <4 x i32> + // X64-NEXT: %conv.i.i = sext <4 x i8> %shuffle.i.i to <4 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %conv.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_cvtepi8_epi64(__U, __A); } __m128i test_mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) { - // CHECK-LABEL: @test_mm_mask_cvtepi32_epi64 - // CHECK: sext <2 x i32> %{{.*}} to <2 x i64> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_cvtepi32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> + // X64-NEXT: %conv.i.i = sext <2 x i32> %shuffle.i.i to <2 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %conv.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtepi32_epi64(__W, __U, __X); } __m128i test_mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) { - // CHECK-LABEL: @test_mm_maskz_cvtepi32_epi64 - // CHECK: sext <2 x i32> %{{.*}} to <2 x i64> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_cvtepi32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> + // X64-NEXT: %conv.i.i = sext <2 x i32> %shuffle.i.i to <2 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %conv.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtepi32_epi64(__U, __X); } __m256i test_mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) { - // CHECK-LABEL: @test_mm256_mask_cvtepi32_epi64 - // CHECK: sext <4 x i32> %{{.*}} to <4 x i64> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_cvtepi32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %conv.i.i = sext <4 x i32> %0 to <4 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %conv.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_cvtepi32_epi64(__W, __U, __X); } __m256i test_mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi32_epi64 - // CHECK: sext <4 x i32> %{{.*}} to <4 x i64> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_cvtepi32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %conv.i.i = sext <4 x i32> %0 to <4 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %conv.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_cvtepi32_epi64(__U, __X); } __m128i test_mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi16_epi32 - // CHECK: sext <4 x i16> %{{.*}} to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_cvtepi16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <4 x i32> + // X64-NEXT: %conv.i.i = sext <4 x i16> %shuffle.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_cvtepi16_epi32(__W, __U, __A); } __m128i test_mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepi16_epi32 - // CHECK: sext <4 x i16> %{{.*}} to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_cvtepi16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <4 x i32> + // X64-NEXT: %conv.i.i = sext <4 x i16> %shuffle.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_cvtepi16_epi32(__U, __A); } __m256i test_mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi16_epi32 - // CHECK: sext <8 x i16> %{{.*}} to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_cvtepi16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %conv.i.i = sext <8 x i16> %0 to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %conv.i.i, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_cvtepi16_epi32(__W, __U, __A); } __m256i test_mm256_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi16_epi32 - // CHECK: sext <8 x i16> %{{.*}} to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_cvtepi16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %conv.i.i = sext <8 x i16> %0 to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> %conv.i.i, <8 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_cvtepi16_epi32(__U, __A); } __m128i test_mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi16_epi64 - // CHECK: sext <2 x i16> %{{.*}} to <2 x i64> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_cvtepi16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <2 x i32> + // X64-NEXT: %conv.i.i = sext <2 x i16> %shuffle.i.i to <2 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %conv.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtepi16_epi64(__W, __U, __A); } __m128i test_mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepi16_epi64 - // CHECK: sext <2 x i16> %{{.*}} to <2 x i64> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_cvtepi16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <2 x i32> + // X64-NEXT: %conv.i.i = sext <2 x i16> %shuffle.i.i to <2 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %conv.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtepi16_epi64(__U, __A); } __m256i test_mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi16_epi64 - // CHECK: sext <4 x i16> %{{.*}} to <4 x i64> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_cvtepi16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <4 x i32> + // X64-NEXT: %conv.i.i = sext <4 x i16> %shuffle.i.i to <4 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %conv.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_cvtepi16_epi64(__W, __U, __A); } __m256i test_mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi16_epi64 - // CHECK: sext <4 x i16> %{{.*}} to <4 x i64> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_cvtepi16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <4 x i32> + // X64-NEXT: %conv.i.i = sext <4 x i16> %shuffle.i.i to <4 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %conv.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_cvtepi16_epi64(__U, __A); } __m128i test_mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepu8_epi32 - // CHECK: zext <4 x i8> %{{.*}} to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_cvtepu8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <4 x i32> + // X64-NEXT: %conv.i.i = zext <4 x i8> %shuffle.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_cvtepu8_epi32(__W, __U, __A); } __m128i test_mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepu8_epi32 - // CHECK: zext <4 x i8> %{{.*}} to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_cvtepu8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <4 x i32> + // X64-NEXT: %conv.i.i = zext <4 x i8> %shuffle.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_cvtepu8_epi32(__U, __A); } __m256i test_mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepu8_epi32 - // CHECK: zext <8 x i8> %{{.*}} to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_cvtepu8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // X64-NEXT: %conv.i.i = zext <8 x i8> %shuffle.i.i to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %conv.i.i, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_cvtepu8_epi32(__W, __U, __A); } __m256i test_mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepu8_epi32 - // CHECK: zext <8 x i8> %{{.*}} to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_cvtepu8_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // X64-NEXT: %conv.i.i = zext <8 x i8> %shuffle.i.i to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> %conv.i.i, <8 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_cvtepu8_epi32(__U, __A); } __m128i test_mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepu8_epi64 - // CHECK: zext <2 x i8> %{{.*}} to <2 x i64> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_cvtepu8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <2 x i32> + // X64-NEXT: %conv.i.i = zext <2 x i8> %shuffle.i.i to <2 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %conv.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtepu8_epi64(__W, __U, __A); } __m128i test_mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepu8_epi64 - // CHECK: zext <2 x i8> %{{.*}} to <2 x i64> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_cvtepu8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <2 x i32> + // X64-NEXT: %conv.i.i = zext <2 x i8> %shuffle.i.i to <2 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %conv.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtepu8_epi64(__U, __A); } __m256i test_mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepu8_epi64 - // CHECK: zext <4 x i8> %{{.*}} to <4 x i64> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_cvtepu8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <4 x i32> + // X64-NEXT: %conv.i.i = zext <4 x i8> %shuffle.i.i to <4 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %conv.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_cvtepu8_epi64(__W, __U, __A); } __m256i test_mm256_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepu8_epi64 - // CHECK: zext <4 x i8> %{{.*}} to <4 x i64> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_cvtepu8_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // X64-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <4 x i32> + // X64-NEXT: %conv.i.i = zext <4 x i8> %shuffle.i.i to <4 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %conv.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_cvtepu8_epi64(__U, __A); } __m128i test_mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) { - // CHECK-LABEL: @test_mm_mask_cvtepu32_epi64 - // CHECK: zext <2 x i32> %{{.*}} to <2 x i64> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_cvtepu32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> + // X64-NEXT: %conv.i.i = zext <2 x i32> %shuffle.i.i to <2 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %conv.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtepu32_epi64(__W, __U, __X); } __m128i test_mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) { - // CHECK-LABEL: @test_mm_maskz_cvtepu32_epi64 - // CHECK: zext <2 x i32> %{{.*}} to <2 x i64> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_cvtepu32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> + // X64-NEXT: %conv.i.i = zext <2 x i32> %shuffle.i.i to <2 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %conv.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtepu32_epi64(__U, __X); } __m256i test_mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) { - // CHECK-LABEL: @test_mm256_mask_cvtepu32_epi64 - // CHECK: zext <4 x i32> %{{.*}} to <4 x i64> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_cvtepu32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %conv.i.i = zext <4 x i32> %0 to <4 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %conv.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_cvtepu32_epi64(__W, __U, __X); } __m256i test_mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) { - // CHECK-LABEL: @test_mm256_maskz_cvtepu32_epi64 - // CHECK: zext <4 x i32> %{{.*}} to <4 x i64> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_cvtepu32_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %conv.i.i = zext <4 x i32> %0 to <4 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %conv.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_cvtepu32_epi64(__U, __X); } __m128i test_mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepu16_epi32 - // CHECK: zext <4 x i16> %{{.*}} to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_cvtepu16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <4 x i32> + // X64-NEXT: %conv.i.i = zext <4 x i16> %shuffle.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_cvtepu16_epi32(__W, __U, __A); } __m128i test_mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepu16_epi32 - // CHECK: zext <4 x i16> %{{.*}} to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_cvtepu16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <4 x i32> + // X64-NEXT: %conv.i.i = zext <4 x i16> %shuffle.i.i to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_cvtepu16_epi32(__U, __A); } __m256i test_mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepu16_epi32 - // CHECK: zext <8 x i16> %{{.*}} to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_cvtepu16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %conv.i.i = zext <8 x i16> %0 to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %conv.i.i, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_cvtepu16_epi32(__W, __U, __A); } __m256i test_mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepu16_epi32 - // CHECK: zext <8 x i16> %{{.*}} to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_cvtepu16_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %conv.i.i = zext <8 x i16> %0 to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> %conv.i.i, <8 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_cvtepu16_epi32(__U, __A); } __m128i test_mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepu16_epi64 - // CHECK: zext <2 x i16> %{{.*}} to <2 x i64> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_cvtepu16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <2 x i32> + // X64-NEXT: %conv.i.i = zext <2 x i16> %shuffle.i.i to <2 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %conv.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtepu16_epi64(__W, __U, __A); } __m128i test_mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepu16_epi64 - // CHECK: zext <2 x i16> %{{.*}} to <2 x i64> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_cvtepu16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <2 x i32> + // X64-NEXT: %conv.i.i = zext <2 x i16> %shuffle.i.i to <2 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %conv.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtepu16_epi64(__U, __A); } __m256i test_mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepu16_epi64 - // CHECK: zext <4 x i16> %{{.*}} to <4 x i64> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_cvtepu16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <4 x i32> + // X64-NEXT: %conv.i.i = zext <4 x i16> %shuffle.i.i to <4 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %conv.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_cvtepu16_epi64(__W, __U, __A); } __m256i test_mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepu16_epi64 - // CHECK: zext <4 x i16> %{{.*}} to <4 x i64> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_cvtepu16_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <4 x i32> + // X64-NEXT: %conv.i.i = zext <4 x i16> %shuffle.i.i to <4 x i64> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %conv.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_cvtepu16_epi64(__U, __A); } __m128i test_mm_rol_epi32(__m128i __A) { - // CHECK-LABEL: @test_mm_rol_epi32 - // CHECK: @llvm.fshl.v4i32 + // X64-LABEL: test_mm_rol_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> ) + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_rol_epi32(__A, 5); } __m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_rol_epi32 - // CHECK: @llvm.fshl.v4i32 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_rol_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> ) + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_rol_epi32(__W, __U, __A, 5); } __m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_rol_epi32 - // CHECK: @llvm.fshl.v4i32 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_rol_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> ) + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_rol_epi32(__U, __A, 5); } __m256i test_mm256_rol_epi32(__m256i __A) { - // CHECK-LABEL: @test_mm256_rol_epi32 - // CHECK: @llvm.fshl.v8i32 + // X64-LABEL: test_mm256_rol_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> ) + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm256_rol_epi32(__A, 5); } __m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_rol_epi32 - // CHECK: @llvm.fshl.v8i32 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_rol_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> ) + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_rol_epi32(__W, __U, __A, 5); } __m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_rol_epi32 - // CHECK: @llvm.fshl.v8i32 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_rol_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> ) + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_rol_epi32(__U, __A, 5); } __m128i test_mm_rol_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm_rol_epi64 - // CHECK: @llvm.fshl.v2i64 + // X64-LABEL: test_mm_rol_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> ) + // X64-NEXT: ret <2 x i64> %0 return _mm_rol_epi64(__A, 5); } __m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_rol_epi64 - // CHECK: @llvm.fshl.v2i64 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_rol_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> ) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_rol_epi64(__W, __U, __A, 5); } __m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_rol_epi64 - // CHECK: @llvm.fshl.v2i64 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_rol_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> ) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_rol_epi64(__U, __A, 5); } __m256i test_mm256_rol_epi64(__m256i __A) { - // CHECK-LABEL: @test_mm256_rol_epi64 - // CHECK: @llvm.fshl.v4i64 + // X64-LABEL: test_mm256_rol_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> ) + // X64-NEXT: ret <4 x i64> %0 return _mm256_rol_epi64(__A, 5); } __m256i test_mm256_mask_rol_epi64(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_rol_epi64 - // CHECK: @llvm.fshl.v4i64 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_rol_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> ) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_rol_epi64(__W, __U, __A, 5); } __m256i test_mm256_maskz_rol_epi64(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_rol_epi64 - // CHECK: @llvm.fshl.v4i64 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_rol_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> ) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_rol_epi64(__U, __A, 5); } __m128i test_mm_rolv_epi32(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_rolv_epi32 - // CHECK: llvm.fshl.v4i32 + // X64-LABEL: test_mm_rolv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_rolv_epi32(__A, __B); } __m128i test_mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_rolv_epi32 - // CHECK: llvm.fshl.v4i32 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_rolv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> %3 + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_mask_rolv_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_rolv_epi32 - // CHECK: llvm.fshl.v4i32 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_rolv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_maskz_rolv_epi32(__U, __A, __B); } __m256i test_mm256_rolv_epi32(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_rolv_epi32 - // CHECK: @llvm.fshl.v8i32 + // X64-LABEL: test_mm256_rolv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_rolv_epi32(__A, __B); } __m256i test_mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_rolv_epi32 - // CHECK: @llvm.fshl.v8i32 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_rolv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask_rolv_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_rolv_epi32 - // CHECK: @llvm.fshl.v8i32 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_rolv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_maskz_rolv_epi32(__U, __A, __B); } __m128i test_mm_rolv_epi64(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_rolv_epi64 - // CHECK: @llvm.fshl.v2i64 + // X64-LABEL: test_mm_rolv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: ret <2 x i64> %0 return _mm_rolv_epi64(__A, __B); } __m128i test_mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_rolv_epi64 - // CHECK: @llvm.fshl.v2i64 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_rolv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_rolv_epi64(__W, __U, __A, __B); } __m128i test_mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_rolv_epi64 - // CHECK: @llvm.fshl.v2i64 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_rolv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_rolv_epi64(__U, __A, __B); } __m256i test_mm256_rolv_epi64(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_rolv_epi64 - // CHECK: @llvm.fshl.v4i64 + // X64-LABEL: test_mm256_rolv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) #9 + // X64-NEXT: ret <4 x i64> %0 return _mm256_rolv_epi64(__A, __B); } __m256i test_mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_rolv_epi64 - // CHECK: @llvm.fshl.v4i64 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_rolv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_rolv_epi64(__W, __U, __A, __B); } __m256i test_mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_rolv_epi64 - // CHECK: @llvm.fshl.v4i64 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_rolv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_rolv_epi64(__U, __A, __B); } __m128i test_mm_ror_epi32(__m128i __A) { - // CHECK-LABEL: @test_mm_ror_epi32 - // CHECK: @llvm.fshr.v4i32 + // X64-LABEL: test_mm_ror_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> ) + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_ror_epi32(__A, 5); } __m128i test_mm_mask_ror_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_ror_epi32 - // CHECK: @llvm.fshr.v4i32 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_ror_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> ) + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_ror_epi32(__W, __U, __A, 5); } __m128i test_mm_maskz_ror_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_ror_epi32 - // CHECK: @llvm.fshr.v4i32 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_ror_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> ) + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_ror_epi32(__U, __A, 5); } __m256i test_mm256_ror_epi32(__m256i __A) { - // CHECK-LABEL: @test_mm256_ror_epi32 - // CHECK: @llvm.fshr.v8i32 + // X64-LABEL: test_mm256_ror_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> ) + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm256_ror_epi32(__A, 5); } __m256i test_mm256_mask_ror_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_ror_epi32 - // CHECK: @llvm.fshr.v8i32 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_ror_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> ) + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_ror_epi32(__W, __U, __A, 5); } __m256i test_mm256_maskz_ror_epi32(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_ror_epi32 - // CHECK: @llvm.fshr.v8i32 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_ror_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> ) + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_ror_epi32(__U, __A, 5); } __m128i test_mm_ror_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm_ror_epi64 - // CHECK: @llvm.fshr.v2i64 + // X64-LABEL: test_mm_ror_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> ) + // X64-NEXT: ret <2 x i64> %0 return _mm_ror_epi64(__A, 5); } __m128i test_mm_mask_ror_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_ror_epi64 - // CHECK: @llvm.fshr.v2i64 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_ror_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> ) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_ror_epi64(__W, __U, __A, 5); } __m128i test_mm_maskz_ror_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_ror_epi64 - // CHECK: @llvm.fshr.v2i64 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_ror_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> ) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_ror_epi64(__U, __A, 5); } __m256i test_mm256_ror_epi64(__m256i __A) { - // CHECK-LABEL: @test_mm256_ror_epi64 - // CHECK: @llvm.fshr.v4i64 + // X64-LABEL: test_mm256_ror_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> ) + // X64-NEXT: ret <4 x i64> %0 return _mm256_ror_epi64(__A, 5); } __m256i test_mm256_mask_ror_epi64(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_ror_epi64 - // CHECK: @llvm.fshr.v4i64 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_ror_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> ) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_ror_epi64(__W, __U, __A,5); } __m256i test_mm256_maskz_ror_epi64(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_ror_epi64 - // CHECK: @llvm.fshr.v4i64 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_ror_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> ) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_ror_epi64(__U, __A, 5); } __m128i test_mm_rorv_epi32(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_rorv_epi32 - // CHECK: @llvm.fshr.v4i32 + // X64-LABEL: test_mm_rorv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_rorv_epi32(__A, __B); } __m128i test_mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_rorv_epi32 - // CHECK: @llvm.fshr.v4i32 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_rorv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> %3 + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_mask_rorv_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_rorv_epi32 - // CHECK: @llvm.fshr.v4i32 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_rorv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_maskz_rorv_epi32(__U, __A, __B); } __m256i test_mm256_rorv_epi32(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_rorv_epi32 - // CHECK: @llvm.fshr.v8i32 + // X64-LABEL: test_mm256_rorv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_rorv_epi32(__A, __B); } __m256i test_mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_rorv_epi32 - // CHECK: @llvm.fshr.v8i32 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_rorv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask_rorv_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_rorv_epi32 - // CHECK: @llvm.fshr.v8i32 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_rorv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_maskz_rorv_epi32(__U, __A, __B); } __m128i test_mm_rorv_epi64(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_rorv_epi64 - // CHECK: @llvm.fshr.v2i64 + // X64-LABEL: test_mm_rorv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: ret <2 x i64> %0 return _mm_rorv_epi64(__A, __B); } __m128i test_mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_rorv_epi64 - // CHECK: @llvm.fshr.v2i64 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_rorv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_rorv_epi64(__W, __U, __A, __B); } __m128i test_mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_rorv_epi64 - // CHECK: @llvm.fshr.v2i64 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_rorv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_rorv_epi64(__U, __A, __B); } __m256i test_mm256_rorv_epi64(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_rorv_epi64 - // CHECK: @llvm.fshr.v4i64 + // X64-LABEL: test_mm256_rorv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) #9 + // X64-NEXT: ret <4 x i64> %0 return _mm256_rorv_epi64(__A, __B); } __m256i test_mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_rorv_epi64 - // CHECK: @llvm.fshr.v4i64 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_rorv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_rorv_epi64(__W, __U, __A, __B); } __m256i test_mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_rorv_epi64 - // CHECK: @llvm.fshr.v4i64 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_rorv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_rorv_epi64(__U, __A, __B); } __m128i test_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_mask_sllv_epi64 - // CHECK: @llvm.x86.avx2.psllv.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_sllv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %__X, <2 x i64> %__Y) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_sllv_epi64(__W, __U, __X, __Y); } __m128i test_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_maskz_sllv_epi64 - // CHECK: @llvm.x86.avx2.psllv.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_sllv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %__X, <2 x i64> %__Y) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_sllv_epi64(__U, __X, __Y); } __m256i test_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_mask_sllv_epi64 - // CHECK: @llvm.x86.avx2.psllv.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_sllv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %__X, <4 x i64> %__Y) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_sllv_epi64(__W, __U, __X, __Y); } __m256i test_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_maskz_sllv_epi64 - // CHECK: @llvm.x86.avx2.psllv.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_sllv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %__X, <4 x i64> %__Y) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_sllv_epi64(__U, __X, __Y); } __m128i test_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_mask_sllv_epi32 - // CHECK: @llvm.x86.avx2.psllv.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_sllv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__Y to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> %3 + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_mask_sllv_epi32(__W, __U, __X, __Y); } __m128i test_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_maskz_sllv_epi32 - // CHECK: @llvm.x86.avx2.psllv.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_sllv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__Y to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_maskz_sllv_epi32(__U, __X, __Y); } __m256i test_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_mask_sllv_epi32 - // CHECK: @llvm.x86.avx2.psllv.d.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_sllv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__Y to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask_sllv_epi32(__W, __U, __X, __Y); } __m256i test_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_maskz_sllv_epi32 - // CHECK: @llvm.x86.avx2.psllv.d.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_sllv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__Y to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_maskz_sllv_epi32(__U, __X, __Y); } __m128i test_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_mask_srlv_epi64 - // CHECK: @llvm.x86.avx2.psrlv.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_srlv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %__X, <2 x i64> %__Y) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_srlv_epi64(__W, __U, __X, __Y); } __m128i test_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_maskz_srlv_epi64 - // CHECK: @llvm.x86.avx2.psrlv.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_srlv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %__X, <2 x i64> %__Y) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_srlv_epi64(__U, __X, __Y); } __m256i test_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_mask_srlv_epi64 - // CHECK: @llvm.x86.avx2.psrlv.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_srlv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %__X, <4 x i64> %__Y) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_srlv_epi64(__W, __U, __X, __Y); } __m256i test_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_maskz_srlv_epi64 - // CHECK: @llvm.x86.avx2.psrlv.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_srlv_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %__X, <4 x i64> %__Y) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_srlv_epi64(__U, __X, __Y); } __m128i test_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_mask_srlv_epi32 - // CHECK: @llvm.x86.avx2.psrlv.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_srlv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__Y to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> %3 + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_mask_srlv_epi32(__W, __U, __X, __Y); } __m128i test_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_maskz_srlv_epi32 - // CHECK: @llvm.x86.avx2.psrlv.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_srlv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__Y to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_maskz_srlv_epi32(__U, __X, __Y); } __m256i test_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_mask_srlv_epi32 - // CHECK: @llvm.x86.avx2.psrlv.d.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_srlv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__Y to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask_srlv_epi32(__W, __U, __X, __Y); } __m256i test_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_maskz_srlv_epi32 - // CHECK: @llvm.x86.avx2.psrlv.d.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_srlv_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__Y to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_maskz_srlv_epi32(__U, __X, __Y); } __m128i test_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_srl_epi32 - // CHECK: @llvm.x86.sse2.psrl.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_srl_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> %3 + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_mask_srl_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_srl_epi32 - // CHECK: @llvm.x86.sse2.psrl.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_srl_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_maskz_srl_epi32(__U, __A, __B); } __m256i test_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_mask_srl_epi32 - // CHECK: @llvm.x86.avx2.psrl.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_srl_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask_srl_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_maskz_srl_epi32 - // CHECK: @llvm.x86.avx2.psrl.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_srl_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_maskz_srl_epi32(__U, __A, __B); } __m128i test_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_srli_epi32 - // CHECK: @llvm.x86.sse2.psrli.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_srli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = lshr <4 x i32> %0, + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %1, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_srli_epi32(__W, __U, __A, 5); } __m128i test_mm_mask_srli_epi32_2(__m128i __W, __mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_mask_srli_epi32_2 - // CHECK: @llvm.x86.sse2.psrli.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_srli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %0, i32 %__B) #9 + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %1, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_srli_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_srli_epi32 - // CHECK: @llvm.x86.sse2.psrli.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_srli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = lshr <4 x i32> %0, + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %1, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_srli_epi32(__U, __A, 5); } __m128i test_mm_maskz_srli_epi32_2(__mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_maskz_srli_epi32_2 - // CHECK: @llvm.x86.sse2.psrli.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_srli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %0, i32 %__B) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %1, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_srli_epi32(__U, __A, __B); } __m256i test_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_srli_epi32 - // CHECK: @llvm.x86.avx2.psrli.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_srli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = lshr <8 x i32> %0, + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_srli_epi32(__W, __U, __A, 5); } __m256i test_mm256_mask_srli_epi32_2(__m256i __W, __mmask8 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_mask_srli_epi32_2 - // CHECK: @llvm.x86.avx2.psrli.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_srli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %0, i32 %__B) #9 + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_srli_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_srli_epi32 - // CHECK: @llvm.x86.avx2.psrli.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_srli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = lshr <8 x i32> %0, + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_srli_epi32(__U, __A, 5); } __m256i test_mm256_maskz_srli_epi32_2(__mmask8 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_maskz_srli_epi32_2 - // CHECK: @llvm.x86.avx2.psrli.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_srli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %0, i32 %__B) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_srli_epi32(__U, __A, __B); } __m128i test_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_srl_epi64 - // CHECK: @llvm.x86.sse2.psrl.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_srl_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_srl_epi64(__W, __U, __A, __B); } __m128i test_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_srl_epi64 - // CHECK: @llvm.x86.sse2.psrl.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_srl_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_srl_epi64(__U, __A, __B); } __m256i test_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_mask_srl_epi64 - // CHECK: @llvm.x86.avx2.psrl.q - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_srl_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_srl_epi64(__W, __U, __A, __B); } __m256i test_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_maskz_srl_epi64 - // CHECK: @llvm.x86.avx2.psrl.q - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_srl_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_srl_epi64(__U, __A, __B); } __m128i test_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_srli_epi64 - // CHECK: @llvm.x86.sse2.psrli.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_srli_epi64 + // X64: entry: + // X64-NEXT: %0 = lshr <2 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_srli_epi64(__W, __U, __A, 5); } __m128i test_mm_mask_srli_epi64_2(__m128i __W, __mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_mask_srli_epi64_2 - // CHECK: @llvm.x86.sse2.psrli.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_srli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %__A, i32 %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_srli_epi64(__W, __U, __A, __B); } __m128i test_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_srli_epi64 - // CHECK: @llvm.x86.sse2.psrli.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_srli_epi64 + // X64: entry: + // X64-NEXT: %0 = lshr <2 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_srli_epi64(__U, __A, 5); } __m128i test_mm_maskz_srli_epi64_2(__mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_maskz_srli_epi64_2 - // CHECK: @llvm.x86.sse2.psrli.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_srli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %__A, i32 %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_srli_epi64(__U, __A, __B); } __m256i test_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_srli_epi64 - // CHECK: @llvm.x86.avx2.psrli.q - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_srli_epi64 + // X64: entry: + // X64-NEXT: %0 = lshr <4 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_srli_epi64(__W, __U, __A, 5); } __m256i test_mm256_mask_srli_epi64_2(__m256i __W, __mmask8 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_mask_srli_epi64_2 - // CHECK: @llvm.x86.avx2.psrli.q - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_srli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %__A, i32 %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_srli_epi64(__W, __U, __A, __B); } __m256i test_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_srli_epi64 - // CHECK: @llvm.x86.avx2.psrli.q - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_srli_epi64 + // X64: entry: + // X64-NEXT: %0 = lshr <4 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_srli_epi64(__U, __A, 5); } __m256i test_mm256_maskz_srli_epi64_2(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_srli_epi64_2 - // CHECK: @llvm.x86.avx2.psrli.q - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_srli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = lshr <4 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_srli_epi64(__U, __A, 5); } __m128i test_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_sll_epi32 - // CHECK: @llvm.x86.sse2.psll.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_sll_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> %3 + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_mask_sll_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_sll_epi32 - // CHECK: @llvm.x86.sse2.psll.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_sll_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_maskz_sll_epi32(__U, __A, __B); } __m256i test_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_mask_sll_epi32 - // CHECK: @llvm.x86.avx2.psll.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_sll_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask_sll_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_maskz_sll_epi32 - // CHECK: @llvm.x86.avx2.psll.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_sll_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_maskz_sll_epi32(__U, __A, __B); } __m128i test_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_slli_epi32 - // CHECK: @llvm.x86.sse2.pslli.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_slli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = shl <4 x i32> %0, + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %1, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_slli_epi32(__W, __U, __A, 5); } __m128i test_mm_mask_slli_epi32_2(__m128i __W, __mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_mask_slli_epi32_2 - // CHECK: @llvm.x86.sse2.pslli.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_slli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %0, i32 %__B) #9 + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %1, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_slli_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_slli_epi32 - // CHECK: @llvm.x86.sse2.pslli.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_slli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = shl <4 x i32> %0, + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %1, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_slli_epi32(__U, __A, 5); } __m128i test_mm_maskz_slli_epi32_2(__mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_maskz_slli_epi32_2 - // CHECK: @llvm.x86.sse2.pslli.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_slli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %0, i32 %__B) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %1, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_slli_epi32(__U, __A, __B); } __m256i test_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_slli_epi32 - // CHECK: @llvm.x86.avx2.pslli.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_slli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = shl <8 x i32> %0, + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_slli_epi32(__W, __U, __A, 5); } __m256i test_mm256_mask_slli_epi32_2(__m256i __W, __mmask8 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_mask_slli_epi32_2 - // CHECK: @llvm.x86.avx2.pslli.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_slli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %0, i32 %__B) #9 + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_slli_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_slli_epi32 - // CHECK: @llvm.x86.avx2.pslli.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_slli_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = shl <8 x i32> %0, + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_slli_epi32(__U, __A, 5); } __m256i test_mm256_maskz_slli_epi32_2(__mmask8 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_maskz_slli_epi32_2 - // CHECK: @llvm.x86.avx2.pslli.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_slli_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %0, i32 %__B) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_slli_epi32(__U, __A, __B); } __m128i test_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_sll_epi64 - // CHECK: @llvm.x86.sse2.psll.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_sll_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_sll_epi64(__W, __U, __A, __B); } __m128i test_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_sll_epi64 - // CHECK: @llvm.x86.sse2.psll.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_sll_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_sll_epi64(__U, __A, __B); } __m256i test_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_mask_sll_epi64 - // CHECK: @llvm.x86.avx2.psll.q - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_sll_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_sll_epi64(__W, __U, __A, __B); } __m256i test_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_maskz_sll_epi64 - // CHECK: @llvm.x86.avx2.psll.q - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_sll_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_sll_epi64(__U, __A, __B); } __m128i test_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_slli_epi64 - // CHECK: @llvm.x86.sse2.pslli.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_slli_epi64 + // X64: entry: + // X64-NEXT: %0 = shl <2 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_slli_epi64(__W, __U, __A, 5); } __m128i test_mm_mask_slli_epi64_2(__m128i __W, __mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_mask_slli_epi64_2 - // CHECK: @llvm.x86.sse2.pslli.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_slli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %__A, i32 %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_slli_epi64(__W, __U, __A, __B); } __m128i test_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_slli_epi64 - // CHECK: @llvm.x86.sse2.pslli.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_slli_epi64 + // X64: entry: + // X64-NEXT: %0 = shl <2 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_slli_epi64(__U, __A, 5); } __m128i test_mm_maskz_slli_epi64_2(__mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_maskz_slli_epi64_2 - // CHECK: @llvm.x86.sse2.pslli.q - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_slli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %__A, i32 %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_slli_epi64(__U, __A, __B); } __m256i test_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_slli_epi64 - // CHECK: @llvm.x86.avx2.pslli.q - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_slli_epi64 + // X64: entry: + // X64-NEXT: %0 = shl <4 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_slli_epi64(__W, __U, __A, 5); } __m256i test_mm256_mask_slli_epi64_2(__m256i __W, __mmask8 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_mask_slli_epi64_2 - // CHECK: @llvm.x86.avx2.pslli.q - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_slli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %__A, i32 %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_slli_epi64(__W, __U, __A, __B); } __m256i test_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_slli_epi64 - // CHECK: @llvm.x86.avx2.pslli.q - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_slli_epi64 + // X64: entry: + // X64-NEXT: %0 = shl <4 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_slli_epi64(__U, __A, 5); } __m256i test_mm256_maskz_slli_epi64_2(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_slli_epi64_2 - // CHECK: @llvm.x86.avx2.pslli.q - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_slli_epi64_2 + // X64: entry: + // X64-NEXT: %0 = shl <4 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_slli_epi64(__U, __A, 5); } __m128i test_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_mask_srav_epi32 - // CHECK: @llvm.x86.avx2.psrav.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_srav_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__Y to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> %3 + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_mask_srav_epi32(__W, __U, __X, __Y); } __m128i test_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_maskz_srav_epi32 - // CHECK: @llvm.x86.avx2.psrav.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_srav_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__X to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__Y to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_maskz_srav_epi32(__U, __X, __Y); } __m256i test_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_mask_srav_epi32 - // CHECK: @llvm.x86.avx2.psrav.d.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_srav_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__Y to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask_srav_epi32(__W, __U, __X, __Y); } __m256i test_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_maskz_srav_epi32 - // CHECK: @llvm.x86.avx2.psrav.d.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_srav_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__Y to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_maskz_srav_epi32(__U, __X, __Y); } __m128i test_mm_srav_epi64(__m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_srav_epi64 - // CHECK: @llvm.x86.avx512.psrav.q.128 + // X64-LABEL: test_mm_srav_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %__X, <2 x i64> %__Y) #9 + // X64-NEXT: ret <2 x i64> %0 return _mm_srav_epi64(__X, __Y); } __m128i test_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_mask_srav_epi64 - // CHECK: @llvm.x86.avx512.psrav.q.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_srav_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %__X, <2 x i64> %__Y) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_srav_epi64(__W, __U, __X, __Y); } __m128i test_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_maskz_srav_epi64 - // CHECK: @llvm.x86.avx512.psrav.q.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_srav_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %__X, <2 x i64> %__Y) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_srav_epi64(__U, __X, __Y); } __m256i test_mm256_srav_epi64(__m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_srav_epi64 - // CHECK: @llvm.x86.avx512.psrav.q.256 + // X64-LABEL: test_mm256_srav_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %__X, <4 x i64> %__Y) #9 + // X64-NEXT: ret <4 x i64> %0 return _mm256_srav_epi64(__X, __Y); } __m256i test_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_mask_srav_epi64 - // CHECK: @llvm.x86.avx512.psrav.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_srav_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %__X, <4 x i64> %__Y) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_srav_epi64(__W, __U, __X, __Y); } __m256i test_mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_maskz_srav_epi64 - // CHECK: @llvm.x86.avx512.psrav.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_srav_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %__X, <4 x i64> %__Y) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_srav_epi64(__U, __X, __Y); } void test_mm_store_epi32(void *__P, __m128i __A) { - // CHECK-LABEL: @test_mm_store_epi32 - // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}} + // X64-LABEL: test_mm_store_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x i64>* + // X64-NEXT: store <2 x i64> %__A, <2 x i64>* %0, align 16, !tbaa !2 + // X64-NEXT: ret void return _mm_store_epi32(__P, __A); } void test_mm_mask_store_epi32(void *__P, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_store_epi32 - // CHECK: @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %{{.*}}, <4 x i32>* %{{.}}, i32 16, <4 x i1> %{{.*}}) + // X64-LABEL: test_mm_mask_store_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i32>* + // X64-NEXT: %1 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %0, i32 16, <4 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm_mask_store_epi32(__P, __U, __A); } void test_mm256_store_epi32(void *__P, __m256i __A) { - // CHECK-LABEL: @test_mm256_store_epi32 - // CHECK: store <4 x i64> %{{.*}}, <4 x i64>* %{{.*}} + // X64-LABEL: test_mm256_store_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: store <4 x i64> %__A, <4 x i64>* %0, align 32, !tbaa !2 + // X64-NEXT: ret void return _mm256_store_epi32(__P, __A); } void test_mm256_mask_store_epi32(void *__P, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_store_epi32 - // CHECK: @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %{{.*}}, <8 x i32>* %{{.}}, i32 32, <8 x i1> %{{.*}}) + // X64-LABEL: test_mm256_mask_store_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i32>* + // X64-NEXT: %1 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: tail call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %1, <8 x i32>* %0, i32 32, <8 x i1> %2) #9 + // X64-NEXT: ret void return _mm256_mask_store_epi32(__P, __U, __A); } __m128i test_mm_mask_mov_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_mov_epi32 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_mov_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_mov_epi32(__W, __U, __A); } __m128i test_mm_maskz_mov_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_mov_epi32 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_mov_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_mov_epi32(__U, __A); } __m256i test_mm256_mask_mov_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_mov_epi32 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_mov_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_mov_epi32(__W, __U, __A); } __m256i test_mm256_maskz_mov_epi32(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_mov_epi32 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_mov_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> %0, <8 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_mov_epi32(__U, __A); } __m128i test_mm_mask_mov_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_mov_epi64 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_mov_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %__A, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_mov_epi64(__W, __U, __A); } __m128i test_mm_maskz_mov_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_mov_epi64 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_mov_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %__A, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_mov_epi64(__U, __A); } __m256i test_mm256_mask_mov_epi64(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_mov_epi64 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_mov_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %__A, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_mov_epi64(__W, __U, __A); } __m256i test_mm256_maskz_mov_epi64(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_mov_epi64 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_mov_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %__A, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_mov_epi64(__U, __A); } __m128i test_mm_load_epi32(void const *__P) { - // CHECK-LABEL: @test_mm_load_epi32 - // CHECK: load <2 x i64>, <2 x i64>* %{{.*}} + // X64-LABEL: test_mm_load_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x i64>* + // X64-NEXT: %1 = load <2 x i64>, <2 x i64>* %0, align 16, !tbaa !2 + // X64-NEXT: ret <2 x i64> %1 return _mm_load_epi32(__P); } __m128i test_mm_mask_load_epi32(__m128i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_load_epi32 - // CHECK: @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %{{.*}}, i32 16, <4 x i1> %{{.*}}, <4 x i32> %{{.*}}) + // X64-LABEL: test_mm_mask_load_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i32>* + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 16, <4 x i1> %extract.i, <4 x i32> %1) #9 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_load_epi32(__W, __U, __P); } __m128i test_mm_maskz_load_epi32(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_load_epi32 - // CHECK: @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %{{.*}}, i32 16, <4 x i1> %{{.*}}, <4 x i32> %{{.*}}) + // X64-LABEL: test_mm_maskz_load_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i32>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 16, <4 x i1> %extract.i, <4 x i32> zeroinitializer) #9 + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_load_epi32(__U, __P); } __m256i test_mm256_load_epi32(void const *__P) { - // CHECK-LABEL: @test_mm256_load_epi32 - // CHECK: load <4 x i64>, <4 x i64>* %{{.*}} + // X64-LABEL: test_mm256_load_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: %1 = load <4 x i64>, <4 x i64>* %0, align 32, !tbaa !2 + // X64-NEXT: ret <4 x i64> %1 return _mm256_load_epi32(__P); } __m256i test_mm256_mask_load_epi32(__m256i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_load_epi32 - // CHECK: @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %{{.*}}, i32 32, <8 x i1> %{{.*}}, <8 x i32> %{{.*}}) + // X64-LABEL: test_mm256_mask_load_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i32>* + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = tail call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %0, i32 32, <8 x i1> %2, <8 x i32> %1) #9 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_load_epi32(__W, __U, __P); } __m256i test_mm256_maskz_load_epi32(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_load_epi32 - // CHECK: @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %{{.*}}, i32 32, <8 x i1> %{{.*}}, <8 x i32> %{{.*}}) + // X64-LABEL: test_mm256_maskz_load_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i32>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %0, i32 32, <8 x i1> %1, <8 x i32> zeroinitializer) #9 + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_load_epi32(__U, __P); } __m128i test_mm_load_epi64(void const *__P) { - // CHECK-LABEL: @test_mm_load_epi64 - // CHECK: load <2 x i64>, <2 x i64>* %{{.*}} + // X64-LABEL: test_mm_load_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x i64>* + // X64-NEXT: %1 = load <2 x i64>, <2 x i64>* %0, align 16, !tbaa !2 + // X64-NEXT: ret <2 x i64> %1 return _mm_load_epi64(__P); } __m128i test_mm_mask_load_epi64(__m128i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_load_epi64 - // CHECK: @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %{{.*}}, i32 16, <2 x i1> %{{.*}}, <2 x i64> %{{.*}}) + // X64-LABEL: test_mm_mask_load_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %0, i32 16, <2 x i1> %extract.i, <2 x i64> %__W) #9 + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_load_epi64(__W, __U, __P); } __m128i test_mm_maskz_load_epi64(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_load_epi64 - // CHECK: @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %{{.*}}, i32 16, <2 x i1> %{{.*}}, <2 x i64> %{{.*}}) + // X64-LABEL: test_mm_maskz_load_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %0, i32 16, <2 x i1> %extract.i, <2 x i64> zeroinitializer) #9 + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_load_epi64(__U, __P); } __m256i test_mm256_load_epi64(void const *__P) { - // CHECK-LABEL: @test_mm256_load_epi64 - // CHECK: load <4 x i64>, <4 x i64>* %{{.*}} + // X64-LABEL: test_mm256_load_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: %1 = load <4 x i64>, <4 x i64>* %0, align 32, !tbaa !2 + // X64-NEXT: ret <4 x i64> %1 return _mm256_load_epi64(__P); } __m256i test_mm256_mask_load_epi64(__m256i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_load_epi64 - // CHECK: @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %{{.*}}, i32 32, <4 x i1> %{{.*}}, <4 x i64> %{{.*}}) + // X64-LABEL: test_mm256_mask_load_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %0, i32 32, <4 x i1> %extract.i, <4 x i64> %__W) #9 + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_load_epi64(__W, __U, __P); } __m256i test_mm256_maskz_load_epi64(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_load_epi64 - // CHECK: @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %{{.*}}, i32 32, <4 x i1> %{{.*}}, <4 x i64> %{{.*}}) + // X64-LABEL: test_mm256_maskz_load_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %0, i32 32, <4 x i1> %extract.i, <4 x i64> zeroinitializer) #9 + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_load_epi64(__U, __P); } void test_mm_store_epi64(void *__P, __m128i __A) { - // CHECK-LABEL: @test_mm_store_epi64 - // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}} + // X64-LABEL: test_mm_store_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x i64>* + // X64-NEXT: store <2 x i64> %__A, <2 x i64>* %0, align 16, !tbaa !2 + // X64-NEXT: ret void return _mm_store_epi64(__P, __A); } void test_mm_mask_store_epi64(void *__P, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_store_epi64 - // CHECK: @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, i32 16, <2 x i1> %{{.*}}) + // X64-LABEL: test_mm_mask_store_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %__A, <2 x i64>* %0, i32 16, <2 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm_mask_store_epi64(__P, __U, __A); } void test_mm256_store_epi64(void *__P, __m256i __A) { - // CHECK-LABEL: @test_mm256_store_epi64 - // CHECK: store <4 x i64> %{{.*}}, <4 x i64>* %{{.*}} + // X64-LABEL: test_mm256_store_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: store <4 x i64> %__A, <4 x i64>* %0, align 32, !tbaa !2 + // X64-NEXT: ret void return _mm256_store_epi64(__P, __A); } void test_mm256_mask_store_epi64(void *__P, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_store_epi64 - // CHECK: @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %{{.*}}, <4 x i64>* %{{.*}}, i32 32, <4 x i1> %{{.*}}) + // X64-LABEL: test_mm256_mask_store_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %__A, <4 x i64>* %0, i32 32, <4 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm256_mask_store_epi64(__P, __U, __A); } __m128d test_mm_mask_movedup_pd(__m128d __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_movedup_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_movedup_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> undef, <2 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %shuffle.i.i, <2 x double> %__W + // X64-NEXT: ret <2 x double> %1 return _mm_mask_movedup_pd(__W, __U, __A); } __m128d test_mm_maskz_movedup_pd(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_movedup_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_movedup_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> undef, <2 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %shuffle.i.i, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %1 return _mm_maskz_movedup_pd(__U, __A); } __m256d test_mm256_mask_movedup_pd(__m256d __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_movedup_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_movedup_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x double> %__A, <4 x double> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %shuffle.i.i, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_movedup_pd(__W, __U, __A); } __m256d test_mm256_maskz_movedup_pd(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_movedup_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_movedup_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x double> %__A, <4 x double> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %shuffle.i.i, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_movedup_pd(__U, __A); } __m128i test_mm_mask_set1_epi32(__m128i __O, __mmask8 __M) { - // CHECK-LABEL: @test_mm_mask_set1_epi32 - // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0 - // CHECK: insertelement <4 x i32> %{{.*}}32 1 - // CHECK: insertelement <4 x i32> %{{.*}}32 2 - // CHECK: insertelement <4 x i32> %{{.*}}32 3 - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_set1_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i32> , <4 x i32> %0 + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_mask_set1_epi32(__O, __M, 5); } __m128i test_mm_maskz_set1_epi32(__mmask8 __M) { - // CHECK-LABEL: @test_mm_maskz_set1_epi32 - // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0 - // CHECK: insertelement <4 x i32> %{{.*}}32 1 - // CHECK: insertelement <4 x i32> %{{.*}}32 2 - // CHECK: insertelement <4 x i32> %{{.*}}32 3 - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_set1_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i32> , <4 x i32> zeroinitializer + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_set1_epi32(__M, 5); } __m256i test_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M) { - // CHECK-LABEL: @test_mm256_mask_set1_epi32 - // CHECK: insertelement <8 x i32> undef, i32 %{{.*}}, i32 0 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 1 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 2 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 3 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 4 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 5 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 6 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 7 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_set1_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__O to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> , <8 x i32> %0 + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_mask_set1_epi32(__O, __M, 5); } __m256i test_mm256_maskz_set1_epi32(__mmask8 __M) { - // CHECK-LABEL: @test_mm256_maskz_set1_epi32 - // CHECK: insertelement <8 x i32> undef, i32 %{{.*}}, i32 0 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 1 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 2 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 3 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 4 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 5 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 6 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 7 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_set1_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x i32> , <8 x i32> zeroinitializer + // X64-NEXT: %2 = bitcast <8 x i32> %1 to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_set1_epi32(__M, 5); } __m128i test_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) { - // CHECK-LABEL: @test_mm_mask_set1_epi64 - // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 - // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_set1_epi64 + // X64: entry: + // X64-NEXT: %vecinit.i.i.i = insertelement <2 x i64> undef, i64 %__A, i32 0 + // X64-NEXT: %vecinit1.i.i.i = shufflevector <2 x i64> %vecinit.i.i.i, <2 x i64> undef, <2 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %vecinit1.i.i.i, <2 x i64> %__O + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_set1_epi64(__O, __M, __A); } __m128i test_mm_maskz_set1_epi64(__mmask8 __M, long long __A) { - // CHECK-LABEL: @test_mm_maskz_set1_epi64 - // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 - // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_set1_epi64 + // X64: entry: + // X64-NEXT: %vecinit.i.i.i = insertelement <2 x i64> undef, i64 %__A, i32 0 + // X64-NEXT: %vecinit1.i.i.i = shufflevector <2 x i64> %vecinit.i.i.i, <2 x i64> undef, <2 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %vecinit1.i.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_set1_epi64(__M, __A); } __m256i test_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) { - // CHECK-LABEL: @test_mm256_mask_set1_epi64 - // CHECK: insertelement <4 x i64> undef, i64 %{{.*}}, i32 0 - // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 1 - // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 2 - // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 3 - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_set1_epi64 + // X64: entry: + // X64-NEXT: %vecinit.i.i.i = insertelement <4 x i64> undef, i64 %__A, i32 0 + // X64-NEXT: %vecinit3.i.i.i = shufflevector <4 x i64> %vecinit.i.i.i, <4 x i64> undef, <4 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %vecinit3.i.i.i, <4 x i64> %__O + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_set1_epi64(__O, __M, __A); } __m256i test_mm256_maskz_set1_epi64(__mmask8 __M, long long __A) { - // CHECK-LABEL: @test_mm256_maskz_set1_epi64 - // CHECK: insertelement <4 x i64> undef, i64 %{{.*}}, i32 0 - // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 1 - // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 2 - // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 3 - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_set1_epi64 + // X64: entry: + // X64-NEXT: %vecinit.i.i.i = insertelement <4 x i64> undef, i64 %__A, i32 0 + // X64-NEXT: %vecinit3.i.i.i = shufflevector <4 x i64> %vecinit.i.i.i, <4 x i64> undef, <4 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %vecinit3.i.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_set1_epi64(__M, __A); } __m128d test_mm_fixupimm_pd(__m128d __A, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_fixupimm_pd - // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.128 + // X64-LABEL: test_mm_fixupimm_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 -1) + // X64-NEXT: ret <2 x double> %0 return _mm_fixupimm_pd(__A, __B, __C, 5); } __m128d test_mm_mask_fixupimm_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_fixupimm_pd - // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.128 + // X64-LABEL: test_mm_mask_fixupimm_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 %__U) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_fixupimm_pd(__A, __U, __B, __C, 5); } __m128d test_mm_maskz_fixupimm_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_fixupimm_pd - // CHECK: @llvm.x86.avx512.maskz.fixupimm.pd.128 + // X64-LABEL: test_mm_maskz_fixupimm_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %__A, <2 x double> %__B, <2 x i64> %__C, i32 5, i8 %__U) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_fixupimm_pd(__U, __A, __B, __C, 5); } __m256d test_mm256_fixupimm_pd(__m256d __A, __m256d __B, __m256i __C) { - // CHECK-LABEL: @test_mm256_fixupimm_pd - // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.256 + // X64-LABEL: test_mm256_fixupimm_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %__A, <4 x double> %__B, <4 x i64> %__C, i32 5, i8 -1) + // X64-NEXT: ret <4 x double> %0 return _mm256_fixupimm_pd(__A, __B, __C, 5); } __m256d test_mm256_mask_fixupimm_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256i __C) { - // CHECK-LABEL: @test_mm256_mask_fixupimm_pd - // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.256 + // X64-LABEL: test_mm256_mask_fixupimm_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %__A, <4 x double> %__B, <4 x i64> %__C, i32 5, i8 %__U) + // X64-NEXT: ret <4 x double> %0 return _mm256_mask_fixupimm_pd(__A, __U, __B, __C, 5); } __m256d test_mm256_maskz_fixupimm_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256i __C) { - // CHECK-LABEL: @test_mm256_maskz_fixupimm_pd - // CHECK: @llvm.x86.avx512.maskz.fixupimm.pd.256 + // X64-LABEL: test_mm256_maskz_fixupimm_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %__A, <4 x double> %__B, <4 x i64> %__C, i32 5, i8 %__U) + // X64-NEXT: ret <4 x double> %0 return _mm256_maskz_fixupimm_pd(__U, __A, __B, __C, 5); } __m128 test_mm_fixupimm_ps(__m128 __A, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_fixupimm_ps - // CHECK: @llvm.x86.avx512.mask.fixupimm.ps.128 + // X64-LABEL: test_mm_fixupimm_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 -1) + // X64-NEXT: ret <4 x float> %1 return _mm_fixupimm_ps(__A, __B, __C, 5); } __m128 test_mm_mask_fixupimm_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_fixupimm_ps - // CHECK: @llvm.x86.avx512.mask.fixupimm.ps.128 + // X64-LABEL: test_mm_mask_fixupimm_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 %__U) + // X64-NEXT: ret <4 x float> %1 return _mm_mask_fixupimm_ps(__A, __U, __B, __C, 5); } __m128 test_mm_maskz_fixupimm_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_fixupimm_ps - // CHECK: @llvm.x86.avx512.maskz.fixupimm.ps.128 + // X64-LABEL: test_mm_maskz_fixupimm_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %__A, <4 x float> %__B, <4 x i32> %0, i32 5, i8 %__U) + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_fixupimm_ps(__U, __A, __B, __C, 5); } __m256 test_mm256_fixupimm_ps(__m256 __A, __m256 __B, __m256i __C) { - // CHECK-LABEL: @test_mm256_fixupimm_ps - // CHECK: @llvm.x86.avx512.mask.fixupimm.ps.256 + // X64-LABEL: test_mm256_fixupimm_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__C to <8 x i32> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %__A, <8 x float> %__B, <8 x i32> %0, i32 5, i8 -1) + // X64-NEXT: ret <8 x float> %1 return _mm256_fixupimm_ps(__A, __B, __C, 5); } __m256 test_mm256_mask_fixupimm_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256i __C) { - // CHECK-LABEL: @test_mm256_mask_fixupimm_ps - // CHECK: @llvm.x86.avx512.mask.fixupimm.ps.256 + // X64-LABEL: test_mm256_mask_fixupimm_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__C to <8 x i32> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %__A, <8 x float> %__B, <8 x i32> %0, i32 5, i8 %__U) + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_fixupimm_ps(__A, __U, __B, __C, 5); } __m256 test_mm256_maskz_fixupimm_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256i __C) { - // CHECK-LABEL: @test_mm256_maskz_fixupimm_ps - // CHECK: @llvm.x86.avx512.maskz.fixupimm.ps.256 + // X64-LABEL: test_mm256_maskz_fixupimm_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__C to <8 x i32> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %__A, <8 x float> %__B, <8 x i32> %0, i32 5, i8 %__U) + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_fixupimm_ps(__U, __A, __B, __C, 5); } __m128d test_mm_mask_load_pd(__m128d __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_load_pd - // CHECK: @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %{{.*}}, i32 16, <2 x i1> %{{.*}}, <2 x double> %{{.*}}) + // X64-LABEL: test_mm_mask_load_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %0, i32 16, <2 x i1> %extract.i, <2 x double> %__W) #9 + // X64-NEXT: ret <2 x double> %2 return _mm_mask_load_pd(__W, __U, __P); } __m128d test_mm_maskz_load_pd(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_load_pd - // CHECK: @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %{{.*}}, i32 16, <2 x i1> %{{.*}}, <2 x double> %{{.*}}) + // X64-LABEL: test_mm_maskz_load_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %0, i32 16, <2 x i1> %extract.i, <2 x double> zeroinitializer) #9 + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_load_pd(__U, __P); } __m256d test_mm256_mask_load_pd(__m256d __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_load_pd - // CHECK: @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %{{.*}}, i32 32, <4 x i1> %{{.*}}, <4 x double> %{{.*}}) + // X64-LABEL: test_mm256_mask_load_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %0, i32 32, <4 x i1> %extract.i, <4 x double> %__W) #9 + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_load_pd(__W, __U, __P); } __m256d test_mm256_maskz_load_pd(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_load_pd - // CHECK: @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %{{.*}}, i32 32, <4 x i1> %{{.*}}, <4 x double> %{{.*}}) + // X64-LABEL: test_mm256_maskz_load_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %0, i32 32, <4 x i1> %extract.i, <4 x double> zeroinitializer) #9 + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_load_pd(__U, __P); } __m128 test_mm_mask_load_ps(__m128 __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_load_ps - // CHECK: @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %{{.*}}, i32 16, <4 x i1> %{{.*}}, <4 x float> %{{.*}}) + // X64-LABEL: test_mm_mask_load_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x float>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 16, <4 x i1> %extract.i, <4 x float> %__W) #9 + // X64-NEXT: ret <4 x float> %2 return _mm_mask_load_ps(__W, __U, __P); } __m128 test_mm_maskz_load_ps(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_load_ps - // CHECK: @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %{{.*}}, i32 16, <4 x i1> %{{.*}}, <4 x float> %{{.*}}) + // X64-LABEL: test_mm_maskz_load_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x float>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 16, <4 x i1> %extract.i, <4 x float> zeroinitializer) #9 + // X64-NEXT: ret <4 x float> %2 return _mm_maskz_load_ps(__U, __P); } __m256 test_mm256_mask_load_ps(__m256 __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_load_ps - // CHECK: @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %{{.*}}, i32 32, <8 x i1> %{{.*}}, <8 x float> %{{.*}}) + // X64-LABEL: test_mm256_mask_load_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x float>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %0, i32 32, <8 x i1> %1, <8 x float> %__W) #9 + // X64-NEXT: ret <8 x float> %2 return _mm256_mask_load_ps(__W, __U, __P); } __m256 test_mm256_maskz_load_ps(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_load_ps - // CHECK: @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %{{.*}}, i32 32, <8 x i1> %{{.*}}, <8 x float> %{{.*}}) + // X64-LABEL: test_mm256_maskz_load_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x float>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %0, i32 32, <8 x i1> %1, <8 x float> zeroinitializer) #9 + // X64-NEXT: ret <8 x float> %2 return _mm256_maskz_load_ps(__U, __P); } __m128i test_mm_loadu_epi64(void const *__P) { - // CHECK-LABEL: @test_mm_loadu_epi64 - // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}} + // X64-LABEL: test_mm_loadu_epi64 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__P to <2 x i64>* + // X64-NEXT: %0 = load <2 x i64>, <2 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret <2 x i64> %0 return _mm_loadu_epi64(__P); } __m128i test_mm_mask_loadu_epi64(__m128i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_loadu_epi64 - // CHECK: @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %{{.*}}, i32 1, <2 x i1> %{{.*}}, <2 x i64> %{{.*}}) + // X64-LABEL: test_mm_mask_loadu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %0, i32 1, <2 x i1> %extract.i, <2 x i64> %__W) #9 + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_loadu_epi64(__W, __U, __P); } __m128i test_mm_maskz_loadu_epi64(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_loadu_epi64 - // CHECK: @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %{{.*}}, i32 1, <2 x i1> %{{.*}}, <2 x i64> %{{.*}}) + // X64-LABEL: test_mm_maskz_loadu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %0, i32 1, <2 x i1> %extract.i, <2 x i64> zeroinitializer) #9 + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_loadu_epi64(__U, __P); } __m256i test_mm256_loadu_epi64(void const *__P) { - // CHECK-LABEL: @test_mm256_loadu_epi64 - // CHECK: load <4 x i64>, <4 x i64>* %{{.*}}, align 1{{$}} + // X64-LABEL: test_mm256_loadu_epi64 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: %0 = load <4 x i64>, <4 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret <4 x i64> %0 return _mm256_loadu_epi64(__P); } __m256i test_mm256_mask_loadu_epi64(__m256i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_loadu_epi64 - // CHECK: @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x i64> %{{.*}}) + // X64-LABEL: test_mm256_mask_loadu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %0, i32 1, <4 x i1> %extract.i, <4 x i64> %__W) #9 + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_loadu_epi64(__W, __U, __P); } __m256i test_mm256_maskz_loadu_epi64(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_loadu_epi64 - // CHECK: @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x i64> %{{.*}}) + // X64-LABEL: test_mm256_maskz_loadu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %0, i32 1, <4 x i1> %extract.i, <4 x i64> zeroinitializer) #9 + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_loadu_epi64(__U, __P); } __m128i test_mm_loadu_epi32(void const *__P) { - // CHECK-LABEL: @test_mm_loadu_epi32 - // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}} + // X64-LABEL: test_mm_loadu_epi32 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__P to <2 x i64>* + // X64-NEXT: %0 = load <2 x i64>, <2 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret <2 x i64> %0 return _mm_loadu_epi32(__P); } __m128i test_mm_mask_loadu_epi32(__m128i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_loadu_epi32 - // CHECK: @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x i32> %{{.*}}) + // X64-LABEL: test_mm_mask_loadu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i32>* + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 1, <4 x i1> %extract.i, <4 x i32> %1) #9 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_loadu_epi32(__W, __U, __P); } __m128i test_mm_maskz_loadu_epi32(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_loadu_epi32 - // CHECK: @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x i32> %{{.*}}) + // X64-LABEL: test_mm_maskz_loadu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i32>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 1, <4 x i1> %extract.i, <4 x i32> zeroinitializer) #9 + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_loadu_epi32(__U, __P); } __m256i test_mm256_loadu_epi32(void const *__P) { - // CHECK-LABEL: @test_mm256_loadu_epi32 - // CHECK: load <4 x i64>, <4 x i64>* %{{.*}}, align 1{{$}} + // X64-LABEL: test_mm256_loadu_epi32 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: %0 = load <4 x i64>, <4 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret <4 x i64> %0 return _mm256_loadu_epi32(__P); } __m256i test_mm256_mask_loadu_epi32(__m256i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_loadu_epi32 - // CHECK: @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x i32> %{{.*}}) + // X64-LABEL: test_mm256_mask_loadu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i32>* + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = tail call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %0, i32 1, <8 x i1> %2, <8 x i32> %1) #9 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_loadu_epi32(__W, __U, __P); } __m256i test_mm256_maskz_loadu_epi32(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_loadu_epi32 - // CHECK: @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x i32> %{{.*}}) + // X64-LABEL: test_mm256_maskz_loadu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i32>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %0, i32 1, <8 x i1> %1, <8 x i32> zeroinitializer) #9 + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_loadu_epi32(__U, __P); } __m128d test_mm_mask_loadu_pd(__m128d __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_loadu_pd - // CHECK: @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %{{.*}}, i32 1, <2 x i1> %{{.*}}, <2 x double> %{{.*}}) + // X64-LABEL: test_mm_mask_loadu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %0, i32 1, <2 x i1> %extract.i, <2 x double> %__W) #9 + // X64-NEXT: ret <2 x double> %2 return _mm_mask_loadu_pd(__W, __U, __P); } __m128d test_mm_maskz_loadu_pd(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_loadu_pd - // CHECK: @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %{{.*}}, i32 1, <2 x i1> %{{.*}}, <2 x double> %{{.*}}) + // X64-LABEL: test_mm_maskz_loadu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %0, i32 1, <2 x i1> %extract.i, <2 x double> zeroinitializer) #9 + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_loadu_pd(__U, __P); } __m256d test_mm256_mask_loadu_pd(__m256d __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_loadu_pd - // CHECK: @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x double> %{{.*}}) + // X64-LABEL: test_mm256_mask_loadu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %0, i32 1, <4 x i1> %extract.i, <4 x double> %__W) #9 + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_loadu_pd(__W, __U, __P); } __m256d test_mm256_maskz_loadu_pd(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_loadu_pd - // CHECK: @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x double> %{{.*}}) + // X64-LABEL: test_mm256_maskz_loadu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %0, i32 1, <4 x i1> %extract.i, <4 x double> zeroinitializer) #9 + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_loadu_pd(__U, __P); } __m128 test_mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_loadu_ps - // CHECK: @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x float> %{{.*}}) + // X64-LABEL: test_mm_mask_loadu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x float>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 1, <4 x i1> %extract.i, <4 x float> %__W) #9 + // X64-NEXT: ret <4 x float> %2 return _mm_mask_loadu_ps(__W, __U, __P); } __m128 test_mm_maskz_loadu_ps(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_loadu_ps - // CHECK: @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x float> %{{.*}}) + // X64-LABEL: test_mm_maskz_loadu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x float>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 1, <4 x i1> %extract.i, <4 x float> zeroinitializer) #9 + // X64-NEXT: ret <4 x float> %2 return _mm_maskz_loadu_ps(__U, __P); } __m256 test_mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_loadu_ps - // CHECK: @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x float> %{{.*}}) + // X64-LABEL: test_mm256_mask_loadu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x float>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %0, i32 1, <8 x i1> %1, <8 x float> %__W) #9 + // X64-NEXT: ret <8 x float> %2 return _mm256_mask_loadu_ps(__W, __U, __P); } __m256 test_mm256_maskz_loadu_ps(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_loadu_ps - // CHECK: @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x float> %{{.*}}) + // X64-LABEL: test_mm256_maskz_loadu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x float>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = tail call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %0, i32 1, <8 x i1> %1, <8 x float> zeroinitializer) #9 + // X64-NEXT: ret <8 x float> %2 return _mm256_maskz_loadu_ps(__U, __P); } void test_mm_mask_store_pd(void *__P, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_store_pd - // CHECK: @llvm.masked.store.v2f64.p0v2f64(<2 x double> %{{.*}}, <2 x double>* %{{.*}}, i32 16, <2 x i1> %{{.*}}) + // X64-LABEL: test_mm_mask_store_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %__A, <2 x double>* %0, i32 16, <2 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm_mask_store_pd(__P, __U, __A); } void test_mm256_mask_store_pd(void *__P, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_store_pd - // CHECK: @llvm.masked.store.v4f64.p0v4f64(<4 x double> %{{.*}}, <4 x double>* %{{.*}}, i32 32, <4 x i1> %{{.*}}) + // X64-LABEL: test_mm256_mask_store_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %__A, <4 x double>* %0, i32 32, <4 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm256_mask_store_pd(__P, __U, __A); } void test_mm_mask_store_ps(void *__P, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_store_ps - // CHECK: @llvm.masked.store.v4f32.p0v4f32(<4 x float> %{{.*}}, <4 x float>* %{{.*}}, i32 16, <4 x i1> %{{.*}}) + // X64-LABEL: test_mm_mask_store_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x float>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %__A, <4 x float>* %0, i32 16, <4 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm_mask_store_ps(__P, __U, __A); } void test_mm256_mask_store_ps(void *__P, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_store_ps - // CHECK: @llvm.masked.store.v8f32.p0v8f32(<8 x float> %{{.*}}, <8 x float>* %{{.*}}, i32 32, <8 x i1> %{{.*}}) + // X64-LABEL: test_mm256_mask_store_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x float>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %__A, <8 x float>* %0, i32 32, <8 x i1> %1) #9 + // X64-NEXT: ret void return _mm256_mask_store_ps(__P, __U, __A); } void test_mm_storeu_epi64(void *__p, __m128i __a) { - // check-label: @test_mm_storeu_epi64 - // check: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + // X64-LABEL: test_mm_storeu_epi64 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__p to <2 x i64>* + // X64-NEXT: store <2 x i64> %__a, <2 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret void return _mm_storeu_epi64(__p, __a); } void test_mm_mask_storeu_epi64(void *__P, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_storeu_epi64 - // CHECK: @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, i32 1, <2 x i1> %{{.*}}) + // X64-LABEL: test_mm_mask_storeu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %__A, <2 x i64>* %0, i32 1, <2 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm_mask_storeu_epi64(__P, __U, __A); } void test_mm256_storeu_epi64(void *__P, __m256i __A) { - // CHECK-LABEL: @test_mm256_storeu_epi64 - // CHECK: store <4 x i64> %{{.*}}, <4 x i64>* %{{.*}}, align 1{{$}} + // X64-LABEL: test_mm256_storeu_epi64 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: store <4 x i64> %__A, <4 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret void return _mm256_storeu_epi64(__P, __A); } void test_mm256_mask_storeu_epi64(void *__P, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_storeu_epi64 - // CHECK: @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %{{.*}}, <4 x i64>* %{{.*}}, i32 1, <4 x i1> %{{.*}}) + // X64-LABEL: test_mm256_mask_storeu_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %__A, <4 x i64>* %0, i32 1, <4 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm256_mask_storeu_epi64(__P, __U, __A); } void test_mm_storeu_epi32(void *__P, __m128i __A) { - // CHECK-LABEL: @test_mm_storeu_epi32 - // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + // X64-LABEL: test_mm_storeu_epi32 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__P to <2 x i64>* + // X64-NEXT: store <2 x i64> %__A, <2 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret void return _mm_storeu_epi32(__P, __A); } void test_mm_mask_storeu_epi32(void *__P, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_storeu_epi32 - // CHECK: @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, i32 1, <4 x i1> %{{.*}}) + // X64-LABEL: test_mm_mask_storeu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x i32>* + // X64-NEXT: %1 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %0, i32 1, <4 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm_mask_storeu_epi32(__P, __U, __A); } void test_mm256_storeu_epi32(void *__P, __m256i __A) { - // CHECK-LABEL: @test_mm256_storeu_epi32 - // CHECK: store <4 x i64> %{{.*}}, <4 x i64>* %{{.*}}, align 1{{$}} + // X64-LABEL: test_mm256_storeu_epi32 + // X64: entry: + // X64-NEXT: %__v.i = bitcast i8* %__P to <4 x i64>* + // X64-NEXT: store <4 x i64> %__A, <4 x i64>* %__v.i, align 1, !tbaa !2 + // X64-NEXT: ret void return _mm256_storeu_epi32(__P, __A); } void test_mm256_mask_storeu_epi32(void *__P, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_storeu_epi32 - // CHECK: @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %{{.*}}, <8 x i32>* %{{.*}}, i32 1, <8 x i1> %{{.*}}) + // X64-LABEL: test_mm256_mask_storeu_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x i32>* + // X64-NEXT: %1 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: tail call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %1, <8 x i32>* %0, i32 1, <8 x i1> %2) #9 + // X64-NEXT: ret void return _mm256_mask_storeu_epi32(__P, __U, __A); } void test_mm_mask_storeu_pd(void *__P, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_storeu_pd - // CHECK: @llvm.masked.store.v2f64.p0v2f64(<2 x double> %{{.*}}, <2 x double>* %{{.*}}, i32 1, <2 x i1> %{{.*}}) + // X64-LABEL: test_mm_mask_storeu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <2 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %__A, <2 x double>* %0, i32 1, <2 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm_mask_storeu_pd(__P, __U, __A); } void test_mm256_mask_storeu_pd(void *__P, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_storeu_pd - // CHECK: @llvm.masked.store.v4f64.p0v4f64(<4 x double> %{{.*}}, <4 x double>* %{{.*}}, i32 1, <4 x i1> %{{.*}}) + // X64-LABEL: test_mm256_mask_storeu_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x double>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %__A, <4 x double>* %0, i32 1, <4 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm256_mask_storeu_pd(__P, __U, __A); } void test_mm_mask_storeu_ps(void *__P, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_storeu_ps - // CHECK: @llvm.masked.store.v4f32.p0v4f32(<4 x float> %{{.*}}, <4 x float>* %{{.*}}, i32 1, <4 x i1> %{{.*}}) + // X64-LABEL: test_mm_mask_storeu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <4 x float>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %__A, <4 x float>* %0, i32 1, <4 x i1> %extract.i) #9 + // X64-NEXT: ret void return _mm_mask_storeu_ps(__P, __U, __A); } void test_mm256_mask_storeu_ps(void *__P, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_storeu_ps - // CHECK: @llvm.masked.store.v8f32.p0v8f32(<8 x float> %{{.*}}, <8 x float>* %{{.*}}, i32 1, <8 x i1> %{{.*}}) + // X64-LABEL: test_mm256_mask_storeu_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8* %__P to <8 x float>* + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %__A, <8 x float>* %0, i32 1, <8 x i1> %1) #9 + // X64-NEXT: ret void return _mm256_mask_storeu_ps(__P, __U, __A); } __m128d test_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_unpackhi_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_unpackhi_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> %__B, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %shuffle.i.i, <2 x double> %__W + // X64-NEXT: ret <2 x double> %1 return _mm_mask_unpackhi_pd(__W, __U, __A, __B); } __m128d test_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_unpackhi_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_unpackhi_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> %__B, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %shuffle.i.i, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %1 return _mm_maskz_unpackhi_pd(__U, __A, __B); } __m256d test_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_mask_unpackhi_pd - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}} <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_unpackhi_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x double> %__A, <4 x double> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %shuffle.i.i, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_unpackhi_pd(__W, __U, __A, __B); } __m256d test_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_maskz_unpackhi_pd - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}} <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_unpackhi_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x double> %__A, <4 x double> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %shuffle.i.i, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_unpackhi_pd(__U, __A, __B); } __m128 test_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_unpackhi_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}} <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_unpackhi_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %shuffle.i.i, <4 x float> %__W + // X64-NEXT: ret <4 x float> %1 return _mm_mask_unpackhi_ps(__W, __U, __A, __B); } __m128 test_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_unpackhi_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}} <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_unpackhi_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %shuffle.i.i, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_unpackhi_ps(__U, __A, __B); } __m256 test_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_unpackhi_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_unpackhi_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x float> %__A, <8 x float> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuffle.i.i, <8 x float> %__W + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_unpackhi_ps(__W, __U, __A, __B); } __m256 test_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_unpackhi_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_unpackhi_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x float> %__A, <8 x float> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuffle.i.i, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_unpackhi_ps(__U, __A, __B); } __m128d test_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_unpacklo_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_unpacklo_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> %__B, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %shuffle.i.i, <2 x double> %__W + // X64-NEXT: ret <2 x double> %1 return _mm_mask_unpacklo_pd(__W, __U, __A, __B); } __m128d test_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_unpacklo_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_unpacklo_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> %__B, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %shuffle.i.i, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %1 return _mm_maskz_unpacklo_pd(__U, __A, __B); } __m256d test_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_mask_unpacklo_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}} <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_unpacklo_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x double> %__A, <4 x double> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %shuffle.i.i, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_unpacklo_pd(__W, __U, __A, __B); } __m256d test_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_maskz_unpacklo_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}} <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_unpacklo_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x double> %__A, <4 x double> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %shuffle.i.i, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_unpacklo_pd(__U, __A, __B); } __m128 test_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_unpacklo_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_unpacklo_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %shuffle.i.i, <4 x float> %__W + // X64-NEXT: ret <4 x float> %1 return _mm_mask_unpacklo_ps(__W, __U, __A, __B); } __m128 test_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_unpacklo_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_unpacklo_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %shuffle.i.i, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_unpacklo_ps(__U, __A, __B); } __m256 test_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_unpacklo_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_unpacklo_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x float> %__A, <8 x float> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuffle.i.i, <8 x float> %__W + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_unpacklo_ps(__W, __U, __A, __B); } __m256 test_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_unpacklo_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_unpacklo_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x float> %__A, <8 x float> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuffle.i.i, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_unpacklo_ps(__U, __A, __B); } __m128d test_mm_rcp14_pd(__m128d __A) { - // CHECK-LABEL: @test_mm_rcp14_pd - // CHECK: @llvm.x86.avx512.rcp14.pd.128 + // X64-LABEL: test_mm_rcp14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %__A, <2 x double> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <2 x double> %0 return _mm_rcp14_pd(__A); } __m128d test_mm_mask_rcp14_pd(__m128d __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_rcp14_pd - // CHECK: @llvm.x86.avx512.rcp14.pd.128 + // X64-LABEL: test_mm_mask_rcp14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %__A, <2 x double> %__W, i8 %__U) #9 + // X64-NEXT: ret <2 x double> %0 return _mm_mask_rcp14_pd(__W, __U, __A); } __m128d test_mm_maskz_rcp14_pd(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_rcp14_pd - // CHECK: @llvm.x86.avx512.rcp14.pd.128 + // X64-LABEL: test_mm_maskz_rcp14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %__A, <2 x double> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_rcp14_pd(__U, __A); } __m256d test_mm256_rcp14_pd(__m256d __A) { - // CHECK-LABEL: @test_mm256_rcp14_pd - // CHECK: @llvm.x86.avx512.rcp14.pd.256 + // X64-LABEL: test_mm256_rcp14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %__A, <4 x double> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_rcp14_pd(__A); } __m256d test_mm256_mask_rcp14_pd(__m256d __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_rcp14_pd - // CHECK: @llvm.x86.avx512.rcp14.pd.256 + // X64-LABEL: test_mm256_mask_rcp14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %__A, <4 x double> %__W, i8 %__U) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_mask_rcp14_pd(__W, __U, __A); } __m256d test_mm256_maskz_rcp14_pd(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_rcp14_pd - // CHECK: @llvm.x86.avx512.rcp14.pd.256 + // X64-LABEL: test_mm256_maskz_rcp14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %__A, <4 x double> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_maskz_rcp14_pd(__U, __A); } __m128 test_mm_rcp14_ps(__m128 __A) { - // CHECK-LABEL: @test_mm_rcp14_ps - // CHECK: @llvm.x86.avx512.rcp14.ps.128 + // X64-LABEL: test_mm_rcp14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %__A, <4 x float> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_rcp14_ps(__A); } __m128 test_mm_mask_rcp14_ps(__m128 __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_rcp14_ps - // CHECK: @llvm.x86.avx512.rcp14.ps.128 + // X64-LABEL: test_mm_mask_rcp14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %__A, <4 x float> %__W, i8 %__U) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_mask_rcp14_ps(__W, __U, __A); } __m128 test_mm_maskz_rcp14_ps(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_rcp14_ps - // CHECK: @llvm.x86.avx512.rcp14.ps.128 + // X64-LABEL: test_mm_maskz_rcp14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %__A, <4 x float> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_rcp14_ps(__U, __A); } __m256 test_mm256_rcp14_ps(__m256 __A) { - // CHECK-LABEL: @test_mm256_rcp14_ps - // CHECK: @llvm.x86.avx512.rcp14.ps.256 + // X64-LABEL: test_mm256_rcp14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %__A, <8 x float> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <8 x float> %0 return _mm256_rcp14_ps(__A); } __m256 test_mm256_mask_rcp14_ps(__m256 __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_rcp14_ps - // CHECK: @llvm.x86.avx512.rcp14.ps.256 + // X64-LABEL: test_mm256_mask_rcp14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %__A, <8 x float> %__W, i8 %__U) #9 + // X64-NEXT: ret <8 x float> %0 return _mm256_mask_rcp14_ps(__W, __U, __A); } __m256 test_mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_rcp14_ps - // CHECK: @llvm.x86.avx512.rcp14.ps.256 + // X64-LABEL: test_mm256_maskz_rcp14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %__A, <8 x float> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <8 x float> %0 return _mm256_maskz_rcp14_ps(__U, __A); } __m128d test_mm_mask_permute_pd(__m128d __W, __mmask8 __U, __m128d __X) { - // CHECK-LABEL: @test_mm_mask_permute_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_permute_pd + // X64: entry: + // X64-NEXT: %permil = shufflevector <2 x double> %__X, <2 x double> undef, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract, <2 x double> %permil, <2 x double> %__W + // X64-NEXT: ret <2 x double> %1 return _mm_mask_permute_pd(__W, __U, __X, 1); } __m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) { - // CHECK-LABEL: @test_mm_maskz_permute_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_permute_pd + // X64: entry: + // X64-NEXT: %permil = shufflevector <2 x double> %__X, <2 x double> undef, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract, <2 x double> %permil, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %1 return _mm_maskz_permute_pd(__U, __X, 1); } __m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) { - // CHECK-LABEL: @test_mm256_mask_permute_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_permute_pd + // X64: entry: + // X64-NEXT: %permil = shufflevector <4 x double> %__X, <4 x double> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x double> %permil, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_permute_pd(__W, __U, __X, 5); } __m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) { - // CHECK-LABEL: @test_mm256_maskz_permute_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_permute_pd + // X64: entry: + // X64-NEXT: %permil = shufflevector <4 x double> %__X, <4 x double> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x double> %permil, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_permute_pd(__U, __X, 5); } __m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) { - // CHECK-LABEL: @test_mm_mask_permute_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_permute_ps + // X64: entry: + // X64-NEXT: %permil = shufflevector <4 x float> %__X, <4 x float> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x float> %permil, <4 x float> %__W + // X64-NEXT: ret <4 x float> %1 return _mm_mask_permute_ps(__W, __U, __X, 0x1b); } __m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) { - // CHECK-LABEL: @test_mm_maskz_permute_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_permute_ps + // X64: entry: + // X64-NEXT: %permil = shufflevector <4 x float> %__X, <4 x float> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x float> %permil, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_permute_ps(__U, __X, 0x1b); } __m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) { - // CHECK-LABEL: @test_mm256_mask_permute_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_permute_ps + // X64: entry: + // X64-NEXT: %permil = shufflevector <8 x float> %__X, <8 x float> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %permil, <8 x float> %__W + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_permute_ps(__W, __U, __X, 0x1b); } __m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) { - // CHECK-LABEL: @test_mm256_maskz_permute_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_permute_ps + // X64: entry: + // X64-NEXT: %permil = shufflevector <8 x float> %__X, <8 x float> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %permil, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_permute_ps(__U, __X, 0x1b); } __m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_permutevar_pd - // CHECK: @llvm.x86.avx.vpermilvar.pd - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_permutevar_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %__A, <2 x i64> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__W + // X64-NEXT: ret <2 x double> %2 return _mm_mask_permutevar_pd(__W, __U, __A, __C); } __m128d test_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_permutevar_pd - // CHECK: @llvm.x86.avx.vpermilvar.pd - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_permutevar_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %__A, <2 x i64> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %2 return _mm_maskz_permutevar_pd(__U, __A, __C); } __m256d test_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) { - // CHECK-LABEL: @test_mm256_mask_permutevar_pd - // CHECK: @llvm.x86.avx.vpermilvar.pd.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_permutevar_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %__A, <4 x i64> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__W + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_permutevar_pd(__W, __U, __A, __C); } __m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) { - // CHECK-LABEL: @test_mm256_maskz_permutevar_pd - // CHECK: @llvm.x86.avx.vpermilvar.pd.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_permutevar_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %__A, <4 x i64> %__C) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_permutevar_pd(__U, __A, __C); } __m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_permutevar_ps - // CHECK: @llvm.x86.avx.vpermilvar.ps - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_permutevar_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %__A, <4 x i32> %0) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x float> %1, <4 x float> %__W + // X64-NEXT: ret <4 x float> %3 return _mm_mask_permutevar_ps(__W, __U, __A, __C); } __m128 test_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_permutevar_ps - // CHECK: @llvm.x86.avx.vpermilvar.ps - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_permutevar_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %__A, <4 x i32> %0) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x float> %1, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %3 return _mm_maskz_permutevar_ps(__U, __A, __C); } __m256 test_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) { - // CHECK-LABEL: @test_mm256_mask_permutevar_ps - // CHECK: @llvm.x86.avx.vpermilvar.ps.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_permutevar_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__C to <8 x i32> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %__A, <8 x i32> %0) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %__W + // X64-NEXT: ret <8 x float> %3 return _mm256_mask_permutevar_ps(__W, __U, __A, __C); } __m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) { - // CHECK-LABEL: @test_mm256_maskz_permutevar_ps - // CHECK: @llvm.x86.avx.vpermilvar.ps.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_permutevar_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__C to <8 x i32> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %__A, <8 x i32> %0) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %3 return _mm256_maskz_permutevar_ps(__U, __A, __C); } __mmask8 test_mm_test_epi32_mask(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_test_epi32_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_test_epi32_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <2 x i64> %and.i.i to <4 x i32> + // X64-NEXT: %1 = icmp ne <4 x i32> %0, zeroinitializer + // X64-NEXT: %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm_test_epi32_mask(__A, __B); } __mmask8 test_mm_mask_test_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_test_epi32_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_test_epi32_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <2 x i64> %and.i.i to <4 x i32> + // X64-NEXT: %1 = icmp ne <4 x i32> %0, zeroinitializer + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = and <4 x i1> %1, %extract.i + // X64-NEXT: %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return _mm_mask_test_epi32_mask(__U, __A, __B); } __mmask8 test_mm256_test_epi32_mask(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_test_epi32_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_test_epi32_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <4 x i64> %and.i.i to <8 x i32> + // X64-NEXT: %1 = icmp ne <8 x i32> %0, zeroinitializer + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_test_epi32_mask(__A, __B); } __mmask8 test_mm256_mask_test_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_test_epi32_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_test_epi32_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <4 x i64> %and.i.i to <8 x i32> + // X64-NEXT: %1 = icmp ne <8 x i32> %0, zeroinitializer + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = and <8 x i1> %1, %2 + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_test_epi32_mask(__U, __A, __B); } __mmask8 test_mm_test_epi64_mask(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_test_epi64_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_test_epi64_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // X64-NEXT: %0 = icmp ne <2 x i64> %and.i.i, zeroinitializer + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_test_epi64_mask(__A, __B); } __mmask8 test_mm_mask_test_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_test_epi64_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_test_epi64_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // X64-NEXT: %0 = icmp ne <2 x i64> %and.i.i, zeroinitializer + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract.i + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_test_epi64_mask(__U, __A, __B); } __mmask8 test_mm256_test_epi64_mask(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_test_epi64_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_test_epi64_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // X64-NEXT: %0 = icmp ne <4 x i64> %and.i.i, zeroinitializer + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_test_epi64_mask(__A, __B); } __mmask8 test_mm256_mask_test_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_test_epi64_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_test_epi64_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // X64-NEXT: %0 = icmp ne <4 x i64> %and.i.i, zeroinitializer + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract.i + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_test_epi64_mask(__U, __A, __B); } __mmask8 test_mm_testn_epi32_mask(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_testn_epi32_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_testn_epi32_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <2 x i64> %and.i.i to <4 x i32> + // X64-NEXT: %1 = icmp eq <4 x i32> %0, zeroinitializer + // X64-NEXT: %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return _mm_testn_epi32_mask(__A, __B); } __mmask8 test_mm_mask_testn_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_testn_epi32_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_testn_epi32_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <2 x i64> %and.i.i to <4 x i32> + // X64-NEXT: %1 = icmp eq <4 x i32> %0, zeroinitializer + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = and <4 x i1> %1, %extract.i + // X64-NEXT: %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return _mm_mask_testn_epi32_mask(__U, __A, __B); } __mmask8 test_mm256_testn_epi32_mask(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_testn_epi32_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_testn_epi32_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <4 x i64> %and.i.i to <8 x i32> + // X64-NEXT: %1 = icmp eq <8 x i32> %0, zeroinitializer + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_testn_epi32_mask(__A, __B); } __mmask8 test_mm256_mask_testn_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_testn_epi32_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_testn_epi32_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // X64-NEXT: %0 = bitcast <4 x i64> %and.i.i to <8 x i32> + // X64-NEXT: %1 = icmp eq <8 x i32> %0, zeroinitializer + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = and <8 x i1> %1, %2 + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_testn_epi32_mask(__U, __A, __B); } __mmask8 test_mm_testn_epi64_mask(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_testn_epi64_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_testn_epi64_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // X64-NEXT: %0 = icmp eq <2 x i64> %and.i.i, zeroinitializer + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm_testn_epi64_mask(__A, __B); } __mmask8 test_mm_mask_testn_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_testn_epi64_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_testn_epi64_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // X64-NEXT: %0 = icmp eq <2 x i64> %and.i.i, zeroinitializer + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract.i + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm_mask_testn_epi64_mask(__U, __A, __B); } __mmask8 test_mm256_testn_epi64_mask(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_testn_epi64_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_testn_epi64_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // X64-NEXT: %0 = icmp eq <4 x i64> %and.i.i, zeroinitializer + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return _mm256_testn_epi64_mask(__A, __B); } __mmask8 test_mm256_mask_testn_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_testn_epi64_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_testn_epi64_mask + // X64: entry: + // X64-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // X64-NEXT: %0 = icmp eq <4 x i64> %and.i.i, zeroinitializer + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract.i + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return _mm256_mask_testn_epi64_mask(__U, __A, __B); } __m128i test_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_unpackhi_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_unpackhi_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %shuffle.i.i, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_unpackhi_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_unpackhi_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_unpackhi_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %shuffle.i.i, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_unpackhi_epi32(__U, __A, __B); } __m256i test_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_unpackhi_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_unpackhi_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %shuffle.i.i, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_unpackhi_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_unpackhi_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_unpackhi_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %shuffle.i.i, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_unpackhi_epi32(__U, __A, __B); } __m128i test_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_unpackhi_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_unpackhi_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x i64> %__A, <2 x i64> %__B, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %shuffle.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_unpackhi_epi64(__W, __U, __A, __B); } __m128i test_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_unpackhi_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_unpackhi_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x i64> %__A, <2 x i64> %__B, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %shuffle.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_unpackhi_epi64(__U, __A, __B); } __m256i test_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_unpackhi_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_unpackhi_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i64> %__A, <4 x i64> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %shuffle.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_unpackhi_epi64(__W, __U, __A, __B); } __m256i test_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_unpackhi_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_unpackhi_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i64> %__A, <4 x i64> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %shuffle.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_unpackhi_epi64(__U, __A, __B); } __m128i test_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_unpacklo_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_unpacklo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %shuffle.i.i, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_unpacklo_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_unpacklo_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_unpacklo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %shuffle.i.i, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_unpacklo_epi32(__U, __A, __B); } __m256i test_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_unpacklo_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_unpacklo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %shuffle.i.i, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_unpacklo_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_unpacklo_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_unpacklo_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %shuffle.i.i, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_unpacklo_epi32(__U, __A, __B); } __m128i test_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_unpacklo_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_unpacklo_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x i64> %__A, <2 x i64> %__B, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %shuffle.i.i, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_unpacklo_epi64(__W, __U, __A, __B); } __m128i test_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_unpacklo_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_unpacklo_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x i64> %__A, <2 x i64> %__B, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %shuffle.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_unpacklo_epi64(__U, __A, __B); } __m256i test_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_unpacklo_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_unpacklo_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i64> %__A, <4 x i64> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %shuffle.i.i, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_unpacklo_epi64(__W, __U, __A, __B); } __m256i test_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_unpacklo_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_unpacklo_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i64> %__A, <4 x i64> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %shuffle.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_unpacklo_epi64(__U, __A, __B); } __m128i test_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_sra_epi32 - // CHECK: @llvm.x86.sse2.psra.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_sra_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> %3 + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_mask_sra_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_sra_epi32 - // CHECK: @llvm.x86.sse2.psra.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_sra_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_maskz_sra_epi32(__U, __A, __B); } __m256i test_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_mask_sra_epi32 - // CHECK: @llvm.x86.avx2.psra.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_sra_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask_sra_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_maskz_sra_epi32 - // CHECK: @llvm.x86.avx2.psra.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_sra_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %0, <4 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_maskz_sra_epi32(__U, __A, __B); } __m128i test_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_srai_epi32 - // CHECK: @llvm.x86.sse2.psrai.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_srai_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = ashr <4 x i32> %0, + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %1, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_srai_epi32(__W, __U, __A, 5); } __m128i test_mm_mask_srai_epi32_2(__m128i __W, __mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_mask_srai_epi32_2 - // CHECK: @llvm.x86.sse2.psrai.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_srai_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %0, i32 %__B) #9 + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %1, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_srai_epi32(__W, __U, __A, __B); } __m128i test_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_srai_epi32 - // CHECK: @llvm.x86.sse2.psrai.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_srai_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = ashr <4 x i32> %0, + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %1, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_srai_epi32(__U, __A, 5); } __m128i test_mm_maskz_srai_epi32_2(__mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_maskz_srai_epi32_2 - // CHECK: @llvm.x86.sse2.psrai.d - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_srai_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %0, i32 %__B) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %1, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_srai_epi32(__U, __A, __B); } __m256i test_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_srai_epi32 - // CHECK: @llvm.x86.avx2.psrai.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_srai_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = ashr <8 x i32> %0, + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_srai_epi32(__W, __U, __A, 5); } __m256i test_mm256_mask_srai_epi32_2(__m256i __W, __mmask8 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_mask_srai_epi32_2 - // CHECK: @llvm.x86.avx2.psrai.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_srai_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %0, i32 %__B) #9 + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_srai_epi32(__W, __U, __A, __B); } __m256i test_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_srai_epi32 - // CHECK: @llvm.x86.avx2.psrai.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_srai_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = ashr <8 x i32> %0, + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_srai_epi32(__U, __A, 5); } __m256i test_mm256_maskz_srai_epi32_2(__mmask8 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_maskz_srai_epi32_2 - // CHECK: @llvm.x86.avx2.psrai.d - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_srai_epi32_2 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %0, i32 %__B) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_srai_epi32(__U, __A, __B); } __m128i test_mm_sra_epi64(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_sra_epi64 - // CHECK: @llvm.x86.avx512.psra.q.128 + // X64-LABEL: test_mm_sra_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: ret <2 x i64> %0 return _mm_sra_epi64(__A, __B); } __m128i test_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_sra_epi64 - // CHECK: @llvm.x86.avx512.psra.q.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_sra_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_sra_epi64(__W, __U, __A, __B); } __m128i test_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_sra_epi64 - // CHECK: @llvm.x86.avx512.psra.q.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_sra_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_sra_epi64(__U, __A, __B); } __m256i test_mm256_sra_epi64(__m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_sra_epi64 - // CHECK: @llvm.x86.avx512.psra.q.256 + // X64-LABEL: test_mm256_sra_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: ret <4 x i64> %0 return _mm256_sra_epi64(__A, __B); } __m256i test_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_mask_sra_epi64 - // CHECK: @llvm.x86.avx512.psra.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_sra_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_sra_epi64(__W, __U, __A, __B); } __m256i test_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_maskz_sra_epi64 - // CHECK: @llvm.x86.avx512.psra.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_sra_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %__A, <2 x i64> %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_sra_epi64(__U, __A, __B); } __m128i test_mm_srai_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm_srai_epi64 - // CHECK: @llvm.x86.avx512.psrai.q.128 + // X64-LABEL: test_mm_srai_epi64 + // X64: entry: + // X64-NEXT: %0 = ashr <2 x i64> %__A, + // X64-NEXT: ret <2 x i64> %0 return _mm_srai_epi64(__A, 5); } __m128i test_mm_srai_epi64_2(__m128i __A, int __B) { - // CHECK-LABEL: @test_mm_srai_epi64_2 - // CHECK: @llvm.x86.avx512.psrai.q.128 + // X64-LABEL: test_mm_srai_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %__A, i32 %__B) #9 + // X64-NEXT: ret <2 x i64> %0 return _mm_srai_epi64(__A, __B); } __m128i test_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_srai_epi64 - // CHECK: @llvm.x86.avx512.psrai.q.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_srai_epi64 + // X64: entry: + // X64-NEXT: %0 = ashr <2 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_srai_epi64(__W, __U, __A, 5); } __m128i test_mm_mask_srai_epi64_2(__m128i __W, __mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_mask_srai_epi64_2 - // CHECK: @llvm.x86.avx512.psrai.q.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_srai_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %__A, i32 %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_srai_epi64(__W, __U, __A, __B); } __m128i test_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_srai_epi64 - // CHECK: @llvm.x86.avx512.psrai.q.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_srai_epi64 + // X64: entry: + // X64-NEXT: %0 = ashr <2 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_srai_epi64(__U, __A, 5); } __m128i test_mm_maskz_srai_epi64_2(__mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_maskz_srai_epi64_2 - // CHECK: @llvm.x86.avx512.psrai.q.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_srai_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %__A, i32 %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_srai_epi64(__U, __A, __B); } __m256i test_mm256_srai_epi64(__m256i __A) { - // CHECK-LABEL: @test_mm256_srai_epi64 - // CHECK: @llvm.x86.avx512.psrai.q.256 + // X64-LABEL: test_mm256_srai_epi64 + // X64: entry: + // X64-NEXT: %0 = ashr <4 x i64> %__A, + // X64-NEXT: ret <4 x i64> %0 return _mm256_srai_epi64(__A, 5); } __m256i test_mm256_srai_epi64_2(__m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_srai_epi64_2 - // CHECK: @llvm.x86.avx512.psrai.q.256 + // X64-LABEL: test_mm256_srai_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %__A, i32 %__B) #9 + // X64-NEXT: ret <4 x i64> %0 return _mm256_srai_epi64(__A, __B); } __m256i test_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_srai_epi64 - // CHECK: @llvm.x86.avx512.psrai.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_srai_epi64 + // X64: entry: + // X64-NEXT: %0 = ashr <4 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_srai_epi64(__W, __U, __A, 5); } __m256i test_mm256_mask_srai_epi64_2(__m256i __W, __mmask8 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_mask_srai_epi64_2 - // CHECK: @llvm.x86.avx512.psrai.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_srai_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %__A, i32 %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_srai_epi64(__W, __U, __A, __B); } __m256i test_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_srai_epi64 - // CHECK: @llvm.x86.avx512.psrai.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_srai_epi64 + // X64: entry: + // X64-NEXT: %0 = ashr <4 x i64> %__A, + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_srai_epi64(__U, __A, 5); } __m256i test_mm256_maskz_srai_epi64_2(__mmask8 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_maskz_srai_epi64_2 - // CHECK: @llvm.x86.avx512.psrai.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_srai_epi64_2 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %__A, i32 %__B) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_srai_epi64(__U, __A, __B); } __m128i test_mm_ternarylogic_epi32(__m128i __A, __m128i __B, __m128i __C) { - // CHECK-LABEL: @test_mm_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.pternlog.d.128 + // X64-LABEL: test_mm_ternarylogic_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %3 = tail call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i32 4) + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_ternarylogic_epi32(__A, __B, __C, 4); } __m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.pternlog.d.128 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_ternarylogic_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %3 = tail call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i32 4) + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract, <4 x i32> %3, <4 x i32> %0 + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_mask_ternarylogic_epi32(__A, __U, __B, __C, 4); } __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.pternlog.d.128 - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer + // X64-LABEL: test_mm_maskz_ternarylogic_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %2 = bitcast <2 x i64> %__C to <4 x i32> + // X64-NEXT: %3 = tail call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i32 4) + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // X64-NEXT: %5 = select <4 x i1> %extract, <4 x i32> %3, <4 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // X64-NEXT: ret <2 x i64> %6 return _mm_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); } __m256i test_mm256_ternarylogic_epi32(__m256i __A, __m256i __B, __m256i __C) { - // CHECK-LABEL: @test_mm256_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.pternlog.d.256 + // X64-LABEL: test_mm256_ternarylogic_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__C to <8 x i32> + // X64-NEXT: %3 = tail call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i32 4) + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_ternarylogic_epi32(__A, __B, __C, 4); } __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { - // CHECK-LABEL: @test_mm256_mask_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.pternlog.d.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_ternarylogic_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__C to <8 x i32> + // X64-NEXT: %3 = tail call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i32 4) + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> %0 + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask_ternarylogic_epi32(__A, __U, __B, __C, 4); } __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { - // CHECK-LABEL: @test_mm256_maskz_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.pternlog.d.256 - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> zeroinitializer + // X64-LABEL: test_mm256_maskz_ternarylogic_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__C to <8 x i32> + // X64-NEXT: %3 = tail call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i32 4) + // X64-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); } __m128i test_mm_ternarylogic_epi64(__m128i __A, __m128i __B, __m128i __C) { - // CHECK-LABEL: @test_mm_ternarylogic_epi64 - // CHECK: @llvm.x86.avx512.pternlog.q.128 + // X64-LABEL: test_mm_ternarylogic_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C, i32 4) + // X64-NEXT: ret <2 x i64> %0 return _mm_ternarylogic_epi64(__A, __B, __C, 4); } __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_ternarylogic_epi64 - // CHECK: @llvm.x86.avx512.pternlog.q.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_ternarylogic_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C, i32 4) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__A + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_ternarylogic_epi64(__A, __U, __B, __C, 4); } __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_ternarylogic_epi64 - // CHECK: @llvm.x86.avx512.pternlog.q.128 - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> zeroinitializer + // X64-LABEL: test_mm_maskz_ternarylogic_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C, i32 4) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4); } __m256i test_mm256_ternarylogic_epi64(__m256i __A, __m256i __B, __m256i __C) { - // CHECK-LABEL: @test_mm256_ternarylogic_epi64 - // CHECK: @llvm.x86.avx512.pternlog.q.256 + // X64-LABEL: test_mm256_ternarylogic_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C, i32 4) + // X64-NEXT: ret <4 x i64> %0 return _mm256_ternarylogic_epi64(__A, __B, __C, 4); } __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { - // CHECK-LABEL: @test_mm256_mask_ternarylogic_epi64 - // CHECK: @llvm.x86.avx512.pternlog.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_ternarylogic_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C, i32 4) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__A + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_ternarylogic_epi64(__A, __U, __B, __C, 4); } __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { - // CHECK-LABEL: @test_mm256_maskz_ternarylogic_epi64 - // CHECK: @llvm.x86.avx512.pternlog.q.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> zeroinitializer + // X64-LABEL: test_mm256_maskz_ternarylogic_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C, i32 4) + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4); } __m256 test_mm256_shuffle_f32x4(__m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_shuffle_f32x4 - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> + // X64-LABEL: test_mm256_shuffle_f32x4 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <8 x float> %__A, <8 x float> %__B, <8 x i32> + // X64-NEXT: ret <8 x float> %shuf return _mm256_shuffle_f32x4(__A, __B, 3); } __m256 test_mm256_mask_shuffle_f32x4(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_shuffle_f32x4 - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_shuffle_f32x4 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <8 x float> %__A, <8 x float> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuf, <8 x float> %__W + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_shuffle_f32x4(__W, __U, __A, __B, 3); } __m256 test_mm256_maskz_shuffle_f32x4(__mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_shuffle_f32x4 - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_shuffle_f32x4 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <8 x float> %__A, <8 x float> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuf, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_shuffle_f32x4(__U, __A, __B, 3); } __m256d test_mm256_shuffle_f64x2(__m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_shuffle_f64x2 - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> + // X64-LABEL: test_mm256_shuffle_f64x2 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <4 x double> %__A, <4 x double> %__B, <4 x i32> + // X64-NEXT: ret <4 x double> %shuf return _mm256_shuffle_f64x2(__A, __B, 3); } __m256d test_mm256_mask_shuffle_f64x2(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_mask_shuffle_f64x2 - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_shuffle_f64x2 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <4 x double> %__A, <4 x double> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x double> %shuf, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_shuffle_f64x2(__W, __U, __A, __B, 3); } __m256d test_mm256_maskz_shuffle_f64x2(__mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_maskz_shuffle_f64x2 - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_shuffle_f64x2 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <4 x double> %__A, <4 x double> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x double> %shuf, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_shuffle_f64x2(__U, __A, __B, 3); } __m256i test_mm256_shuffle_i32x4(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_shuffle_i32x4 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // X64-LABEL: test_mm256_shuffle_i32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %shuf = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i32> %shuf to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm256_shuffle_i32x4(__A, __B, 3); } __m256i test_mm256_mask_shuffle_i32x4(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_shuffle_i32x4 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_shuffle_i32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %shuf = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %shuf, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_shuffle_i32x4(__W, __U, __A, __B, 3); } __m256i test_mm256_maskz_shuffle_i32x4(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_shuffle_i32x4 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_shuffle_i32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %shuf = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %shuf, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_shuffle_i32x4(__U, __A, __B, 3); } __m256i test_mm256_shuffle_i64x2(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_shuffle_i64x2 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> + // X64-LABEL: test_mm256_shuffle_i64x2 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <4 x i64> %__A, <4 x i64> %__B, <4 x i32> + // X64-NEXT: ret <4 x i64> %shuf return _mm256_shuffle_i64x2(__A, __B, 3); } __m256i test_mm256_mask_shuffle_i64x2(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_shuffle_i64x2 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_shuffle_i64x2 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <4 x i64> %__A, <4 x i64> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x i64> %shuf, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_shuffle_i64x2(__W, __U, __A, __B, 3); } __m256i test_mm256_maskz_shuffle_i64x2(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_shuffle_i64x2 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_shuffle_i64x2 + // X64: entry: + // X64-NEXT: %shuf = shufflevector <4 x i64> %__A, <4 x i64> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x i64> %shuf, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_shuffle_i64x2(__U, __A, __B, 3); } __m128d test_mm_mask_shuffle_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_shuffle_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_shuffle_pd + // X64: entry: + // X64-NEXT: %shufp = shufflevector <2 x double> %__A, <2 x double> %__B, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract, <2 x double> %shufp, <2 x double> %__W + // X64-NEXT: ret <2 x double> %1 return _mm_mask_shuffle_pd(__W, __U, __A, __B, 3); } __m128d test_mm_maskz_shuffle_pd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_shuffle_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_shuffle_pd + // X64: entry: + // X64-NEXT: %shufp = shufflevector <2 x double> %__A, <2 x double> %__B, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract, <2 x double> %shufp, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %1 return _mm_maskz_shuffle_pd(__U, __A, __B, 3); } __m256d test_mm256_mask_shuffle_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_mask_shuffle_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_shuffle_pd + // X64: entry: + // X64-NEXT: %shufp = shufflevector <4 x double> %__A, <4 x double> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x double> %shufp, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_shuffle_pd(__W, __U, __A, __B, 3); } __m256d test_mm256_maskz_shuffle_pd(__mmask8 __U, __m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_maskz_shuffle_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_shuffle_pd + // X64: entry: + // X64-NEXT: %shufp = shufflevector <4 x double> %__A, <4 x double> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x double> %shufp, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_shuffle_pd(__U, __A, __B, 3); } __m128 test_mm_mask_shuffle_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_shuffle_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_shuffle_ps + // X64: entry: + // X64-NEXT: %shufp = shufflevector <4 x float> %__A, <4 x float> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x float> %shufp, <4 x float> %__W + // X64-NEXT: ret <4 x float> %1 return _mm_mask_shuffle_ps(__W, __U, __A, __B, 4); } __m128 test_mm_maskz_shuffle_ps(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_shuffle_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_shuffle_ps + // X64: entry: + // X64-NEXT: %shufp = shufflevector <4 x float> %__A, <4 x float> %__B, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x float> %shufp, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_shuffle_ps(__U, __A, __B, 4); } __m256 test_mm256_mask_shuffle_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_shuffle_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_shuffle_ps + // X64: entry: + // X64-NEXT: %shufp = shufflevector <8 x float> %__A, <8 x float> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shufp, <8 x float> %__W + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_shuffle_ps(__W, __U, __A, __B, 4); } __m256 test_mm256_maskz_shuffle_ps(__mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_shuffle_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_shuffle_ps + // X64: entry: + // X64-NEXT: %shufp = shufflevector <8 x float> %__A, <8 x float> %__B, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shufp, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_shuffle_ps(__U, __A, __B, 4); } __m128d test_mm_rsqrt14_pd(__m128d __A) { - // CHECK-LABEL: @test_mm_rsqrt14_pd - // CHECK: @llvm.x86.avx512.rsqrt14.pd.128 + // X64-LABEL: test_mm_rsqrt14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %__A, <2 x double> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <2 x double> %0 return _mm_rsqrt14_pd(__A); } __m128d test_mm_mask_rsqrt14_pd(__m128d __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_rsqrt14_pd - // CHECK: @llvm.x86.avx512.rsqrt14.pd.128 + // X64-LABEL: test_mm_mask_rsqrt14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %__A, <2 x double> %__W, i8 %__U) #9 + // X64-NEXT: ret <2 x double> %0 return _mm_mask_rsqrt14_pd(__W, __U, __A); } __m128d test_mm_maskz_rsqrt14_pd(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_rsqrt14_pd - // CHECK: @llvm.x86.avx512.rsqrt14.pd.128 + // X64-LABEL: test_mm_maskz_rsqrt14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %__A, <2 x double> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_rsqrt14_pd(__U, __A); } __m256d test_mm256_rsqrt14_pd(__m256d __A) { - // CHECK-LABEL: @test_mm256_rsqrt14_pd - // CHECK: @llvm.x86.avx512.rsqrt14.pd.256 + // X64-LABEL: test_mm256_rsqrt14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %__A, <4 x double> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_rsqrt14_pd(__A); } __m256d test_mm256_mask_rsqrt14_pd(__m256d __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_rsqrt14_pd - // CHECK: @llvm.x86.avx512.rsqrt14.pd.256 + // X64-LABEL: test_mm256_mask_rsqrt14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %__A, <4 x double> %__W, i8 %__U) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_mask_rsqrt14_pd(__W, __U, __A); } __m256d test_mm256_maskz_rsqrt14_pd(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_rsqrt14_pd - // CHECK: @llvm.x86.avx512.rsqrt14.pd.256 + // X64-LABEL: test_mm256_maskz_rsqrt14_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %__A, <4 x double> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_maskz_rsqrt14_pd(__U, __A); } __m128 test_mm_rsqrt14_ps(__m128 __A) { - // CHECK-LABEL: @test_mm_rsqrt14_ps - // CHECK: @llvm.x86.avx512.rsqrt14.ps.128 + // X64-LABEL: test_mm_rsqrt14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %__A, <4 x float> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_rsqrt14_ps(__A); } __m128 test_mm_mask_rsqrt14_ps(__m128 __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_rsqrt14_ps - // CHECK: @llvm.x86.avx512.rsqrt14.ps.128 + // X64-LABEL: test_mm_mask_rsqrt14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %__A, <4 x float> %__W, i8 %__U) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_mask_rsqrt14_ps(__W, __U, __A); } __m128 test_mm_maskz_rsqrt14_ps(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_rsqrt14_ps - // CHECK: @llvm.x86.avx512.rsqrt14.ps.128 + // X64-LABEL: test_mm_maskz_rsqrt14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %__A, <4 x float> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_rsqrt14_ps(__U, __A); } __m256 test_mm256_rsqrt14_ps(__m256 __A) { - // CHECK-LABEL: @test_mm256_rsqrt14_ps - // CHECK: @llvm.x86.avx512.rsqrt14.ps.256 + // X64-LABEL: test_mm256_rsqrt14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %__A, <8 x float> zeroinitializer, i8 -1) #9 + // X64-NEXT: ret <8 x float> %0 return _mm256_rsqrt14_ps(__A); } __m256 test_mm256_mask_rsqrt14_ps(__m256 __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_rsqrt14_ps - // CHECK: @llvm.x86.avx512.rsqrt14.ps.256 + // X64-LABEL: test_mm256_mask_rsqrt14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %__A, <8 x float> %__W, i8 %__U) #9 + // X64-NEXT: ret <8 x float> %0 return _mm256_mask_rsqrt14_ps(__W, __U, __A); } __m256 test_mm256_maskz_rsqrt14_ps(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_rsqrt14_ps - // CHECK: @llvm.x86.avx512.rsqrt14.ps.256 + // X64-LABEL: test_mm256_maskz_rsqrt14_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %__A, <8 x float> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <8 x float> %0 return _mm256_maskz_rsqrt14_ps(__U, __A); } __m256 test_mm256_broadcast_f32x4(__m128 __A) { - // CHECK-LABEL: @test_mm256_broadcast_f32x4 - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> + // X64-LABEL: test_mm256_broadcast_f32x4 + // X64: entry: + // X64-NEXT: %shuffle.i = shufflevector <4 x float> %__A, <4 x float> undef, <8 x i32> + // X64-NEXT: ret <8 x float> %shuffle.i return _mm256_broadcast_f32x4(__A); } __m256 test_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) { - // CHECK-LABEL: @test_mm256_mask_broadcast_f32x4 - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_broadcast_f32x4 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuffle.i.i, <8 x float> %__O + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_broadcast_f32x4(__O, __M, __A); } __m256 test_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) { - // CHECK-LABEL: @test_mm256_maskz_broadcast_f32x4 - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_broadcast_f32x4 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuffle.i.i, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_broadcast_f32x4(__M, __A); } __m256i test_mm256_broadcast_i32x4(__m128i const* __A) { - // CHECK-LABEL: @test_mm256_broadcast_i32x4 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // X64-LABEL: test_mm256_broadcast_i32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64>* %__A to <4 x i32>* + // X64-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i32> %shuffle.i to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm256_broadcast_i32x4(_mm_loadu_si128(__A)); } __m256i test_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i const* __A) { - // CHECK-LABEL: @test_mm256_mask_broadcast_i32x4 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_broadcast_i32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64>* %__A to <4 x i32>* + // X64-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__O to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %shuffle.i.i, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_broadcast_i32x4(__O, __M, _mm_loadu_si128(__A)); } __m256i test_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i const* __A) { - // CHECK-LABEL: @test_mm256_maskz_broadcast_i32x4 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_broadcast_i32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64>* %__A to <4 x i32>* + // X64-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 1, !tbaa !2 + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %shuffle.i.i, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_broadcast_i32x4(__M, _mm_loadu_si128(__A)); } __m256d test_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) { - // CHECK-LABEL: @test_mm256_mask_broadcastsd_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_broadcastsd_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> undef, <4 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %shuffle.i.i, <4 x double> %__O + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_broadcastsd_pd(__O, __M, __A); } __m256d test_mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) { - // CHECK-LABEL: @test_mm256_maskz_broadcastsd_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_broadcastsd_pd + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> undef, <4 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %shuffle.i.i, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_broadcastsd_pd(__M, __A); } __m128 test_mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_broadcastss_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_broadcastss_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <4 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %shuffle.i.i, <4 x float> %__O + // X64-NEXT: ret <4 x float> %1 return _mm_mask_broadcastss_ps(__O, __M, __A); } __m128 test_mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_broadcastss_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_broadcastss_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <4 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %shuffle.i.i, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_broadcastss_ps(__M, __A); } __m256 test_mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A) { - // CHECK-LABEL: @test_mm256_mask_broadcastss_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> zeroinitializer - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_broadcastss_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <8 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuffle.i.i, <8 x float> %__O + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_broadcastss_ps(__O, __M, __A); } __m256 test_mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) { - // CHECK-LABEL: @test_mm256_maskz_broadcastss_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> zeroinitializer - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_broadcastss_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <8 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuffle.i.i, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_broadcastss_ps(__M, __A); } __m128i test_mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_broadcastd_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_broadcastd_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract.i, <4 x i32> %shuffle.i.i, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_broadcastd_epi32(__O, __M, __A); } __m128i test_mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_broadcastd_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_broadcastd_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i32> %shuffle.i.i, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_broadcastd_epi32(__M, __A); } __m256i test_mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_broadcastd_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> zeroinitializer - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_broadcastd_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <8 x i32> zeroinitializer + // X64-NEXT: %1 = bitcast <4 x i64> %__O to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %shuffle.i.i, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_broadcastd_epi32(__O, __M, __A); } __m256i test_mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_broadcastd_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> zeroinitializer - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_broadcastd_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %shuffle.i.i = shufflevector <4 x i32> %0, <4 x i32> undef, <8 x i32> zeroinitializer + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> %shuffle.i.i, <8 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_broadcastd_epi32(__M, __A); } __m128i test_mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_broadcastq_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> zeroinitializer - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_broadcastq_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x i64> %__A, <2 x i64> undef, <2 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %shuffle.i.i, <2 x i64> %__O + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_broadcastq_epi64(__O, __M, __A); } __m128i test_mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_broadcastq_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> zeroinitializer - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_broadcastq_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x i64> %__A, <2 x i64> undef, <2 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x i64> %shuffle.i.i, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_broadcastq_epi64(__M, __A); } __m256i test_mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_broadcastq_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_broadcastq_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x i64> %__A, <2 x i64> undef, <4 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %shuffle.i.i, <4 x i64> %__O + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_broadcastq_epi64(__O, __M, __A); } __m256i test_mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_broadcastq_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_broadcastq_epi64 + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <2 x i64> %__A, <2 x i64> undef, <4 x i32> zeroinitializer + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i64> %shuffle.i.i, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_broadcastq_epi64(__M, __A); } __m128i test_mm_cvtsepi32_epi8(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtsepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.db.128 + // X64-LABEL: test_mm_cvtsepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %0, <16 x i8> zeroinitializer, i8 -1) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_cvtsepi32_epi8(__A); } __m128i test_mm_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtsepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.db.128 + // X64-LABEL: test_mm_mask_cvtsepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %0, <16 x i8> %1, i8 %__M) #9 + // X64-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_mask_cvtsepi32_epi8(__O, __M, __A); } __m128i test_mm_maskz_cvtsepi32_epi8(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtsepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.db.128 + // X64-LABEL: test_mm_maskz_cvtsepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %0, <16 x i8> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtsepi32_epi8(__M, __A); } void test_mm_mask_cvtsepi32_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtsepi32_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.db.mem.128 + // X64-LABEL: test_mm_mask_cvtsepi32_storeu_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %__P, <4 x i32> %0, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtsepi32_storeu_epi8(__P, __M, __A); } __m128i test_mm256_cvtsepi32_epi8(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtsepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.db.256 + // X64-LABEL: test_mm256_cvtsepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %0, <16 x i8> zeroinitializer, i8 -1) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_cvtsepi32_epi8(__A); } __m128i test_mm256_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtsepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.db.256 + // X64-LABEL: test_mm256_mask_cvtsepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %0, <16 x i8> %1, i8 %__M) #9 + // X64-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm256_mask_cvtsepi32_epi8(__O, __M, __A); } __m128i test_mm256_maskz_cvtsepi32_epi8(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtsepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.db.256 + // X64-LABEL: test_mm256_maskz_cvtsepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %0, <16 x i8> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_maskz_cvtsepi32_epi8(__M, __A); } void test_mm256_mask_cvtsepi32_storeu_epi8(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtsepi32_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.db.mem.256 + // X64-LABEL: test_mm256_mask_cvtsepi32_storeu_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %__P, <8 x i32> %0, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtsepi32_storeu_epi8(__P, __M, __A); } __m128i test_mm_cvtsepi32_epi16(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtsepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.dw.128 + // X64-LABEL: test_mm_cvtsepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %0, <8 x i16> zeroinitializer, i8 -1) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_cvtsepi32_epi16(__A); } __m128i test_mm_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtsepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.dw.128 + // X64-LABEL: test_mm_mask_cvtsepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %0, <8 x i16> %1, i8 %__M) #9 + // X64-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_mask_cvtsepi32_epi16(__O, __M, __A); } __m128i test_mm_maskz_cvtsepi32_epi16(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtsepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.dw.128 + // X64-LABEL: test_mm_maskz_cvtsepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %0, <8 x i16> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtsepi32_epi16(__M, __A); } void test_mm_mask_cvtsepi32_storeu_epi16(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtsepi32_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.dw.mem.128 + // X64-LABEL: test_mm_mask_cvtsepi32_storeu_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %__P, <4 x i32> %0, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtsepi32_storeu_epi16(__P, __M, __A); } __m128i test_mm256_cvtsepi32_epi16(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtsepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.dw.256 + // X64-LABEL: test_mm256_cvtsepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %0, <8 x i16> zeroinitializer, i8 -1) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_cvtsepi32_epi16(__A); } __m128i test_mm256_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtsepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.dw.256 + // X64-LABEL: test_mm256_mask_cvtsepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %0, <8 x i16> %1, i8 %__M) #9 + // X64-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm256_mask_cvtsepi32_epi16(__O, __M, __A); } __m128i test_mm256_maskz_cvtsepi32_epi16(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtsepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.dw.256 + // X64-LABEL: test_mm256_maskz_cvtsepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %0, <8 x i16> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_maskz_cvtsepi32_epi16(__M, __A); } void test_mm256_mask_cvtsepi32_storeu_epi16(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtsepi32_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.dw.mem.256 + // X64-LABEL: test_mm256_mask_cvtsepi32_storeu_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %__P, <8 x i32> %0, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtsepi32_storeu_epi16(__P, __M, __A); } __m128i test_mm_cvtsepi64_epi8(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtsepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.qb.128 + // X64-LABEL: test_mm_cvtsepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %__A, <16 x i8> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_cvtsepi64_epi8(__A); } __m128i test_mm_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtsepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.qb.128 + // X64-LABEL: test_mm_mask_cvtsepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %__A, <16 x i8> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtsepi64_epi8(__O, __M, __A); } __m128i test_mm_maskz_cvtsepi64_epi8(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtsepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.qb.128 + // X64-LABEL: test_mm_maskz_cvtsepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %__A, <16 x i8> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvtsepi64_epi8(__M, __A); } void test_mm_mask_cvtsepi64_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtsepi64_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.qb.mem.128 + // X64-LABEL: test_mm_mask_cvtsepi64_storeu_epi8 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %__P, <2 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtsepi64_storeu_epi8(__P, __M, __A); } __m128i test_mm256_cvtsepi64_epi8(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtsepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.qb.256 + // X64-LABEL: test_mm256_cvtsepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %__A, <16 x i8> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_cvtsepi64_epi8(__A); } __m128i test_mm256_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtsepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.qb.256 + // X64-LABEL: test_mm256_mask_cvtsepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %__A, <16 x i8> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_mask_cvtsepi64_epi8(__O, __M, __A); } __m128i test_mm256_maskz_cvtsepi64_epi8(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtsepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.qb.256 + // X64-LABEL: test_mm256_maskz_cvtsepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %__A, <16 x i8> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_maskz_cvtsepi64_epi8(__M, __A); } void test_mm256_mask_cvtsepi64_storeu_epi8(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtsepi64_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.qb.mem.256 + // X64-LABEL: test_mm256_mask_cvtsepi64_storeu_epi8 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %__P, <4 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtsepi64_storeu_epi8(__P, __M, __A); } __m128i test_mm_cvtsepi64_epi32(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtsepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovs.qd.128 + // X64-LABEL: test_mm_cvtsepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %__A, <4 x i32> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_cvtsepi64_epi32(__A); } __m128i test_mm_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtsepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovs.qd.128 + // X64-LABEL: test_mm_mask_cvtsepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %__A, <4 x i32> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtsepi64_epi32(__O, __M, __A); } __m128i test_mm_maskz_cvtsepi64_epi32(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtsepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovs.qd.128 + // X64-LABEL: test_mm_maskz_cvtsepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %__A, <4 x i32> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvtsepi64_epi32(__M, __A); } void test_mm_mask_cvtsepi64_storeu_epi32(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtsepi64_storeu_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovs.qd.mem.128 + // X64-LABEL: test_mm_mask_cvtsepi64_storeu_epi32 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %__P, <2 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtsepi64_storeu_epi32(__P, __M, __A); } __m128i test_mm256_cvtsepi64_epi32(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtsepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovs.qd.256 + // X64-LABEL: test_mm256_cvtsepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %__A, <4 x i32> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_cvtsepi64_epi32(__A); } __m128i test_mm256_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtsepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovs.qd.256 + // X64-LABEL: test_mm256_mask_cvtsepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %__A, <4 x i32> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_mask_cvtsepi64_epi32(__O, __M, __A); } __m128i test_mm256_maskz_cvtsepi64_epi32(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtsepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovs.qd.256 + // X64-LABEL: test_mm256_maskz_cvtsepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %__A, <4 x i32> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_maskz_cvtsepi64_epi32(__M, __A); } void test_mm256_mask_cvtsepi64_storeu_epi32(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtsepi64_storeu_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovs.qd.mem.256 + // X64-LABEL: test_mm256_mask_cvtsepi64_storeu_epi32 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %__P, <4 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtsepi64_storeu_epi32(__P, __M, __A); } __m128i test_mm_cvtsepi64_epi16(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtsepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.qw.128 + // X64-LABEL: test_mm_cvtsepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %__A, <8 x i16> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_cvtsepi64_epi16(__A); } __m128i test_mm_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtsepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.qw.128 + // X64-LABEL: test_mm_mask_cvtsepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %__A, <8 x i16> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtsepi64_epi16(__O, __M, __A); } __m128i test_mm_maskz_cvtsepi64_epi16(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtsepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.qw.128 + // X64-LABEL: test_mm_maskz_cvtsepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvtsepi64_epi16(__M, __A); } void test_mm_mask_cvtsepi64_storeu_epi16(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtsepi64_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.qw.mem.128 + // X64-LABEL: test_mm_mask_cvtsepi64_storeu_epi16 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %__P, <2 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtsepi64_storeu_epi16(__P, __M, __A); } __m128i test_mm256_cvtsepi64_epi16(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtsepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.qw.256 + // X64-LABEL: test_mm256_cvtsepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %__A, <8 x i16> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_cvtsepi64_epi16(__A); } __m128i test_mm256_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtsepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.qw.256 + // X64-LABEL: test_mm256_mask_cvtsepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %__A, <8 x i16> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_mask_cvtsepi64_epi16(__O, __M, __A); } __m128i test_mm256_maskz_cvtsepi64_epi16(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtsepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.qw.256 + // X64-LABEL: test_mm256_maskz_cvtsepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_maskz_cvtsepi64_epi16(__M, __A); } void test_mm256_mask_cvtsepi64_storeu_epi16(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtsepi64_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovs.qw.mem.256 + // X64-LABEL: test_mm256_mask_cvtsepi64_storeu_epi16 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %__P, <4 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtsepi64_storeu_epi16(__P, __M, __A); } __m128i test_mm_cvtusepi32_epi8(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtusepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.db.128 + // X64-LABEL: test_mm_cvtusepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %0, <16 x i8> zeroinitializer, i8 -1) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_cvtusepi32_epi8(__A); } __m128i test_mm_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtusepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.db.128 + // X64-LABEL: test_mm_mask_cvtusepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %0, <16 x i8> %1, i8 %__M) #9 + // X64-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_mask_cvtusepi32_epi8(__O, __M, __A); } __m128i test_mm_maskz_cvtusepi32_epi8(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtusepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.db.128 + // X64-LABEL: test_mm_maskz_cvtusepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %0, <16 x i8> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtusepi32_epi8(__M, __A); } void test_mm_mask_cvtusepi32_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtusepi32_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.db.mem.128 + // X64-LABEL: test_mm_mask_cvtusepi32_storeu_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %__P, <4 x i32> %0, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtusepi32_storeu_epi8(__P, __M, __A); } __m128i test_mm256_cvtusepi32_epi8(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtusepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.db.256 + // X64-LABEL: test_mm256_cvtusepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %0, <16 x i8> zeroinitializer, i8 -1) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_cvtusepi32_epi8(__A); } __m128i test_mm256_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtusepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.db.256 + // X64-LABEL: test_mm256_mask_cvtusepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %0, <16 x i8> %1, i8 %__M) #9 + // X64-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm256_mask_cvtusepi32_epi8(__O, __M, __A); } __m128i test_mm256_maskz_cvtusepi32_epi8(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtusepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.db.256 + // X64-LABEL: test_mm256_maskz_cvtusepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %0, <16 x i8> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_maskz_cvtusepi32_epi8(__M, __A); } void test_mm256_mask_cvtusepi32_storeu_epi8(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtusepi32_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.db.mem.256 + // X64-LABEL: test_mm256_mask_cvtusepi32_storeu_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %__P, <8 x i32> %0, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtusepi32_storeu_epi8(__P, __M, __A); } __m128i test_mm_cvtusepi32_epi16(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtusepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.dw.128 + // X64-LABEL: test_mm_cvtusepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %0, <8 x i16> zeroinitializer, i8 -1) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_cvtusepi32_epi16(__A); } __m128i test_mm_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtusepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.dw.128 + // X64-LABEL: test_mm_mask_cvtusepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %0, <8 x i16> %1, i8 %__M) #9 + // X64-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_mask_cvtusepi32_epi16(__O, __M, __A); } __m128i test_mm_maskz_cvtusepi32_epi16(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtusepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.dw.128 + // X64-LABEL: test_mm_maskz_cvtusepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %0, <8 x i16> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtusepi32_epi16(__M, __A); } void test_mm_mask_cvtusepi32_storeu_epi16(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtusepi32_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.dw.mem.128 + // X64-LABEL: test_mm_mask_cvtusepi32_storeu_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %__P, <4 x i32> %0, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtusepi32_storeu_epi16(__P, __M, __A); } __m128i test_mm256_cvtusepi32_epi16(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtusepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.dw.256 + // X64-LABEL: test_mm256_cvtusepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %0, <8 x i16> zeroinitializer, i8 -1) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_cvtusepi32_epi16(__A); } __m128i test_mm256_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtusepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.dw.256 + // X64-LABEL: test_mm256_mask_cvtusepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %0, <8 x i16> %1, i8 %__M) #9 + // X64-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm256_mask_cvtusepi32_epi16(__O, __M, __A); } __m128i test_mm256_maskz_cvtusepi32_epi16(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtusepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.dw.256 + // X64-LABEL: test_mm256_maskz_cvtusepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %0, <8 x i16> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_maskz_cvtusepi32_epi16(__M, __A); } void test_mm256_mask_cvtusepi32_storeu_epi16(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtusepi32_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.dw.mem.256 + // X64-LABEL: test_mm256_mask_cvtusepi32_storeu_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %__P, <8 x i32> %0, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtusepi32_storeu_epi16(__P, __M, __A); } __m128i test_mm_cvtusepi64_epi8(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtusepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.qb.128 + // X64-LABEL: test_mm_cvtusepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %__A, <16 x i8> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_cvtusepi64_epi8(__A); } __m128i test_mm_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtusepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.qb.128 + // X64-LABEL: test_mm_mask_cvtusepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %__A, <16 x i8> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtusepi64_epi8(__O, __M, __A); } __m128i test_mm_maskz_cvtusepi64_epi8(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtusepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.qb.128 + // X64-LABEL: test_mm_maskz_cvtusepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %__A, <16 x i8> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvtusepi64_epi8(__M, __A); } void test_mm_mask_cvtusepi64_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtusepi64_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.qb.mem.128 + // X64-LABEL: test_mm_mask_cvtusepi64_storeu_epi8 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %__P, <2 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtusepi64_storeu_epi8(__P, __M, __A); } __m128i test_mm256_cvtusepi64_epi8(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtusepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.qb.256 + // X64-LABEL: test_mm256_cvtusepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %__A, <16 x i8> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_cvtusepi64_epi8(__A); } __m128i test_mm256_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtusepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.qb.256 + // X64-LABEL: test_mm256_mask_cvtusepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %__A, <16 x i8> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_mask_cvtusepi64_epi8(__O, __M, __A); } __m128i test_mm256_maskz_cvtusepi64_epi8(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtusepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.qb.256 + // X64-LABEL: test_mm256_maskz_cvtusepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %__A, <16 x i8> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_maskz_cvtusepi64_epi8(__M, __A); } void test_mm256_mask_cvtusepi64_storeu_epi8(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtusepi64_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.qb.mem.256 + // X64-LABEL: test_mm256_mask_cvtusepi64_storeu_epi8 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %__P, <4 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtusepi64_storeu_epi8(__P, __M, __A); } __m128i test_mm_cvtusepi64_epi32(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtusepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovus.qd.128 + // X64-LABEL: test_mm_cvtusepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %__A, <4 x i32> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_cvtusepi64_epi32(__A); } __m128i test_mm_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtusepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovus.qd.128 + // X64-LABEL: test_mm_mask_cvtusepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %__A, <4 x i32> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtusepi64_epi32(__O, __M, __A); } __m128i test_mm_maskz_cvtusepi64_epi32(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtusepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovus.qd.128 + // X64-LABEL: test_mm_maskz_cvtusepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %__A, <4 x i32> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvtusepi64_epi32(__M, __A); } void test_mm_mask_cvtusepi64_storeu_epi32(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtusepi64_storeu_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovus.qd.mem.128 + // X64-LABEL: test_mm_mask_cvtusepi64_storeu_epi32 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %__P, <2 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtusepi64_storeu_epi32(__P, __M, __A); } __m128i test_mm256_cvtusepi64_epi32(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtusepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovus.qd.256 + // X64-LABEL: test_mm256_cvtusepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %__A, <4 x i32> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_cvtusepi64_epi32(__A); } __m128i test_mm256_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtusepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovus.qd.256 + // X64-LABEL: test_mm256_mask_cvtusepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %__A, <4 x i32> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_mask_cvtusepi64_epi32(__O, __M, __A); } __m128i test_mm256_maskz_cvtusepi64_epi32(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtusepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovus.qd.256 + // X64-LABEL: test_mm256_maskz_cvtusepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %__A, <4 x i32> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_maskz_cvtusepi64_epi32(__M, __A); } void test_mm256_mask_cvtusepi64_storeu_epi32(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtusepi64_storeu_epi32 - // CHECK: @llvm.x86.avx512.mask.pmovus.qd.mem.256 + // X64-LABEL: test_mm256_mask_cvtusepi64_storeu_epi32 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %__P, <4 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtusepi64_storeu_epi32(__P, __M, __A); } __m128i test_mm_cvtusepi64_epi16(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtusepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.qw.128 + // X64-LABEL: test_mm_cvtusepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %__A, <8 x i16> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_cvtusepi64_epi16(__A); } __m128i test_mm_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtusepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.qw.128 + // X64-LABEL: test_mm_mask_cvtusepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %__A, <8 x i16> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtusepi64_epi16(__O, __M, __A); } __m128i test_mm_maskz_cvtusepi64_epi16(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtusepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.qw.128 + // X64-LABEL: test_mm_maskz_cvtusepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvtusepi64_epi16(__M, __A); } void test_mm_mask_cvtusepi64_storeu_epi16(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtusepi64_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.qw.mem.128 + // X64-LABEL: test_mm_mask_cvtusepi64_storeu_epi16 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %__P, <2 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtusepi64_storeu_epi16(__P, __M, __A); } __m128i test_mm256_cvtusepi64_epi16(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtusepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.qw.256 + // X64-LABEL: test_mm256_cvtusepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %__A, <8 x i16> zeroinitializer, i8 -1) #9 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_cvtusepi64_epi16(__A); } __m128i test_mm256_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtusepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.qw.256 + // X64-LABEL: test_mm256_mask_cvtusepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %__A, <8 x i16> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_mask_cvtusepi64_epi16(__O, __M, __A); } __m128i test_mm256_maskz_cvtusepi64_epi16(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtusepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.qw.256 + // X64-LABEL: test_mm256_maskz_cvtusepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_maskz_cvtusepi64_epi16(__M, __A); } void test_mm256_mask_cvtusepi64_storeu_epi16(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtusepi64_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmovus.qw.mem.256 + // X64-LABEL: test_mm256_mask_cvtusepi64_storeu_epi16 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %__P, <4 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtusepi64_storeu_epi16(__P, __M, __A); } __m128i test_mm_cvtepi32_epi8(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtepi32_epi8 - // CHECK: trunc <4 x i32> %{{.*}} to <4 x i8> - // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> + // X64-LABEL: test_mm_cvtepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %conv.i = trunc <4 x i32> %0 to <4 x i8> + // X64-NEXT: %shuffle.i = shufflevector <4 x i8> %conv.i, <4 x i8> zeroinitializer, <16 x i32> + // X64-NEXT: %1 = bitcast <16 x i8> %shuffle.i to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_cvtepi32_epi8(__A); } __m128i test_mm_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.128 + // X64-LABEL: test_mm_mask_cvtepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %0, <16 x i8> %1, i8 %__M) #9 + // X64-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_mask_cvtepi32_epi8(__O, __M, __A); } __m128i test_mm_maskz_cvtepi32_epi8(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.128 + // X64-LABEL: test_mm_maskz_cvtepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %0, <16 x i8> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtepi32_epi8(__M, __A); } void test_mm_mask_cvtepi32_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi32_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.mem.128 + // X64-LABEL: test_mm_mask_cvtepi32_storeu_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %__P, <4 x i32> %0, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtepi32_storeu_epi8(__P, __M, __A); } __m128i test_mm256_cvtepi32_epi8(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtepi32_epi8 - // CHECK: trunc <8 x i32> %{{.*}} to <8 x i8> - // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> + // X64-LABEL: test_mm256_cvtepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %conv.i = trunc <8 x i32> %0 to <8 x i8> + // X64-NEXT: %shuffle.i = shufflevector <8 x i8> %conv.i, <8 x i8> zeroinitializer, <16 x i32> + // X64-NEXT: %1 = bitcast <16 x i8> %shuffle.i to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_cvtepi32_epi8(__A); } __m128i test_mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.256 + // X64-LABEL: test_mm256_mask_cvtepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %0, <16 x i8> %1, i8 %__M) #9 + // X64-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm256_mask_cvtepi32_epi8(__O, __M, __A); } __m128i test_mm256_maskz_cvtepi32_epi8(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.256 + // X64-LABEL: test_mm256_maskz_cvtepi32_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %0, <16 x i8> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_maskz_cvtepi32_epi8(__M, __A); } void test_mm256_mask_cvtepi32_storeu_epi8(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi32_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.mem.256 + // X64-LABEL: test_mm256_mask_cvtepi32_storeu_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %__P, <8 x i32> %0, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtepi32_storeu_epi8(__P, __M, __A); } __m128i test_mm_cvtepi32_epi16(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtepi32_epi16 - // CHECK: trunc <4 x i32> %{{.*}} to <4 x i16> - // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> + // X64-LABEL: test_mm_cvtepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %conv.i = trunc <4 x i32> %0 to <4 x i16> + // X64-NEXT: %shuffle.i = shufflevector <4 x i16> %conv.i, <4 x i16> zeroinitializer, <8 x i32> + // X64-NEXT: %1 = bitcast <8 x i16> %shuffle.i to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_cvtepi32_epi16(__A); } __m128i test_mm_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.dw.128 + // X64-LABEL: test_mm_mask_cvtepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %0, <8 x i16> %1, i8 %__M) #9 + // X64-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_mask_cvtepi32_epi16(__O, __M, __A); } __m128i test_mm_maskz_cvtepi32_epi16(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.dw.128 + // X64-LABEL: test_mm_maskz_cvtepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %0, <8 x i16> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtepi32_epi16(__M, __A); } void test_mm_mask_cvtepi32_storeu_epi16(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi32_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.dw.mem.128 + // X64-LABEL: test_mm_mask_cvtepi32_storeu_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %__P, <4 x i32> %0, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtepi32_storeu_epi16(__P, __M, __A); } __m128i test_mm256_cvtepi32_epi16(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtepi32_epi16 - // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16> + // X64-LABEL: test_mm256_cvtepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %conv.i = trunc <8 x i32> %0 to <8 x i16> + // X64-NEXT: %1 = bitcast <8 x i16> %conv.i to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_cvtepi32_epi16(__A); } __m128i test_mm256_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.dw.256 + // X64-LABEL: test_mm256_mask_cvtepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %0, <8 x i16> %1, i8 %__M) #9 + // X64-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm256_mask_cvtepi32_epi16(__O, __M, __A); } __m128i test_mm256_maskz_cvtepi32_epi16(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.dw.256 + // X64-LABEL: test_mm256_maskz_cvtepi32_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %0, <8 x i16> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_maskz_cvtepi32_epi16(__M, __A); } void test_mm256_mask_cvtepi32_storeu_epi16(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi32_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.dw.mem.256 + // X64-LABEL: test_mm256_mask_cvtepi32_storeu_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %__P, <8 x i32> %0, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtepi32_storeu_epi16(__P, __M, __A); } __m128i test_mm_cvtepi64_epi8(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtepi64_epi8 - // CHECK: trunc <2 x i64> %{{.*}} to <2 x i8> - // CHECK: shufflevector <2 x i8> %{{.*}}, <2 x i8> %{{.*}}, <16 x i32> + // X64-LABEL: test_mm_cvtepi64_epi8 + // X64: entry: + // X64-NEXT: %conv.i = trunc <2 x i64> %__A to <2 x i8> + // X64-NEXT: %shuffle.i = shufflevector <2 x i8> %conv.i, <2 x i8> zeroinitializer, <16 x i32> + // X64-NEXT: %0 = bitcast <16 x i8> %shuffle.i to <2 x i64> + // X64-NEXT: ret <2 x i64> %0 return _mm_cvtepi64_epi8(__A); } __m128i test_mm_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.128 + // X64-LABEL: test_mm_mask_cvtepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %__A, <16 x i8> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtepi64_epi8(__O, __M, __A); } __m128i test_mm_maskz_cvtepi64_epi8(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.128 + // X64-LABEL: test_mm_maskz_cvtepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %__A, <16 x i8> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvtepi64_epi8(__M, __A); } void test_mm_mask_cvtepi64_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi64_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.mem.128 + // X64-LABEL: test_mm_mask_cvtepi64_storeu_epi8 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %__P, <2 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtepi64_storeu_epi8(__P, __M, __A); } __m128i test_mm256_cvtepi64_epi8(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtepi64_epi8 - // CHECK: trunc <4 x i64> %{{.*}} to <4 x i8> - // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> + // X64-LABEL: test_mm256_cvtepi64_epi8 + // X64: entry: + // X64-NEXT: %conv.i = trunc <4 x i64> %__A to <4 x i8> + // X64-NEXT: %shuffle.i = shufflevector <4 x i8> %conv.i, <4 x i8> zeroinitializer, <16 x i32> + // X64-NEXT: %0 = bitcast <16 x i8> %shuffle.i to <2 x i64> + // X64-NEXT: ret <2 x i64> %0 return _mm256_cvtepi64_epi8(__A); } __m128i test_mm256_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.256 + // X64-LABEL: test_mm256_mask_cvtepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // X64-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %__A, <16 x i8> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_mask_cvtepi64_epi8(__O, __M, __A); } __m128i test_mm256_maskz_cvtepi64_epi8(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.256 + // X64-LABEL: test_mm256_maskz_cvtepi64_epi8 + // X64: entry: + // X64-NEXT: %0 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %__A, <16 x i8> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <16 x i8> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_maskz_cvtepi64_epi8(__M, __A); } void test_mm256_mask_cvtepi64_storeu_epi8(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi64_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.mem.256 + // X64-LABEL: test_mm256_mask_cvtepi64_storeu_epi8 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %__P, <4 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtepi64_storeu_epi8(__P, __M, __A); } __m128i test_mm_cvtepi64_epi32(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtepi64_epi32 - // CHECK: trunc <2 x i64> %{{.*}} to <2 x i32> - // CHECK: shufflevector <2 x i32> %{{.*}}, <2 x i32> %{{.*}}, <4 x i32> + // X64-LABEL: test_mm_cvtepi64_epi32 + // X64: entry: + // X64-NEXT: %conv.i = trunc <2 x i64> %__A to <2 x i32> + // X64-NEXT: %shuffle.i = shufflevector <2 x i32> %conv.i, <2 x i32> zeroinitializer, <4 x i32> + // X64-NEXT: %0 = bitcast <4 x i32> %shuffle.i to <2 x i64> + // X64-NEXT: ret <2 x i64> %0 return _mm_cvtepi64_epi32(__A); } __m128i test_mm_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmov.qd.128 + // X64-LABEL: test_mm_mask_cvtepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <4 x i32> + // X64-NEXT: %1 = tail call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %__A, <4 x i32> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtepi64_epi32(__O, __M, __A); } __m128i test_mm_maskz_cvtepi64_epi32(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmov.qd.128 + // X64-LABEL: test_mm_maskz_cvtepi64_epi32 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %__A, <4 x i32> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <4 x i32> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvtepi64_epi32(__M, __A); } void test_mm_mask_cvtepi64_storeu_epi32(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi64_storeu_epi32 - // CHECK: @llvm.x86.avx512.mask.pmov.qd.mem.128 + // X64-LABEL: test_mm_mask_cvtepi64_storeu_epi32 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %__P, <2 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtepi64_storeu_epi32(__P, __M, __A); } __m128i test_mm256_cvtepi64_epi32(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtepi64_epi32 - // CHECK: trunc <4 x i64> %{{.*}} to <4 x i32> + // X64-LABEL: test_mm256_cvtepi64_epi32 + // X64: entry: + // X64-NEXT: %conv.i = trunc <4 x i64> %__A to <4 x i32> + // X64-NEXT: %0 = bitcast <4 x i32> %conv.i to <2 x i64> + // X64-NEXT: ret <2 x i64> %0 return _mm256_cvtepi64_epi32(__A); } __m128i test_mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi64_epi32 - // CHECK: trunc <4 x i64> %{{.*}} to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_cvtepi64_epi32 + // X64: entry: + // X64-NEXT: %conv.i.i = trunc <4 x i64> %__A to <4 x i32> + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> %0 + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm256_mask_cvtepi64_epi32(__O, __M, __A); } __m128i test_mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi64_epi32 - // CHECK: trunc <4 x i64> %{{.*}} to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_cvtepi64_epi32 + // X64: entry: + // X64-NEXT: %conv.i.i = trunc <4 x i64> %__A to <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> zeroinitializer + // X64-NEXT: %2 = bitcast <4 x i32> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_maskz_cvtepi64_epi32(__M, __A); } void test_mm256_mask_cvtepi64_storeu_epi32(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi64_storeu_epi32 - // CHECK: @llvm.x86.avx512.mask.pmov.qd.mem.256 + // X64-LABEL: test_mm256_mask_cvtepi64_storeu_epi32 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %__P, <4 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtepi64_storeu_epi32(__P, __M, __A); } __m128i test_mm_cvtepi64_epi16(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtepi64_epi16 - // CHECK: trunc <2 x i64> %{{.*}} to <2 x i16> - // CHECK: shufflevector <2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <8 x i32> + // X64-LABEL: test_mm_cvtepi64_epi16 + // X64: entry: + // X64-NEXT: %conv.i = trunc <2 x i64> %__A to <2 x i16> + // X64-NEXT: %shuffle.i = shufflevector <2 x i16> %conv.i, <2 x i16> zeroinitializer, <8 x i32> + // X64-NEXT: %0 = bitcast <8 x i16> %shuffle.i to <2 x i64> + // X64-NEXT: ret <2 x i64> %0 return _mm_cvtepi64_epi16(__A); } __m128i test_mm_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.128 + // X64-LABEL: test_mm_mask_cvtepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %__A, <8 x i16> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtepi64_epi16(__O, __M, __A); } __m128i test_mm_maskz_cvtepi64_epi16(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.128 + // X64-LABEL: test_mm_maskz_cvtepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvtepi64_epi16(__M, __A); } void test_mm_mask_cvtepi64_storeu_epi16(void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi64_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.mem.128 + // X64-LABEL: test_mm_mask_cvtepi64_storeu_epi16 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %__P, <2 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm_mask_cvtepi64_storeu_epi16(__P, __M, __A); } __m128i test_mm256_cvtepi64_epi16(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtepi64_epi16 - // CHECK: trunc <4 x i64> %{{.*}} to <4 x i16> - // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> + // X64-LABEL: test_mm256_cvtepi64_epi16 + // X64: entry: + // X64-NEXT: %conv.i = trunc <4 x i64> %__A to <4 x i16> + // X64-NEXT: %shuffle.i = shufflevector <4 x i16> %conv.i, <4 x i16> zeroinitializer, <8 x i32> + // X64-NEXT: %0 = bitcast <8 x i16> %shuffle.i to <2 x i64> + // X64-NEXT: ret <2 x i64> %0 return _mm256_cvtepi64_epi16(__A); } __m128i test_mm256_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.256 + // X64-LABEL: test_mm256_mask_cvtepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %__A, <8 x i16> %0, i8 %__M) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_mask_cvtepi64_epi16(__O, __M, __A); } __m128i test_mm256_maskz_cvtepi64_epi16(__mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.256 + // X64-LABEL: test_mm256_maskz_cvtepi64_epi16 + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M) #9 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_maskz_cvtepi64_epi16(__M, __A); } void test_mm256_mask_cvtepi64_storeu_epi16(void * __P, __mmask8 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi64_storeu_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.mem.256 + // X64-LABEL: test_mm256_mask_cvtepi64_storeu_epi16 + // X64: entry: + // X64-NEXT: tail call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %__P, <4 x i64> %__A, i8 %__M) #9 + // X64-NEXT: ret void return _mm256_mask_cvtepi64_storeu_epi16(__P, __M, __A); } __m128 test_mm256_extractf32x4_ps(__m256 __A) { - // CHECK-LABEL: @test_mm256_extractf32x4_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> + // X64-LABEL: test_mm256_extractf32x4_ps + // X64: entry: + // X64-NEXT: %extract = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> + // X64-NEXT: ret <4 x float> %extract return _mm256_extractf32x4_ps(__A, 1); } __m128 test_mm256_mask_extractf32x4_ps(__m128 __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_extractf32x4_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_extractf32x4_ps + // X64: entry: + // X64-NEXT: %extract = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract1, <4 x float> %extract, <4 x float> %__W + // X64-NEXT: ret <4 x float> %1 return _mm256_mask_extractf32x4_ps(__W, __U, __A, 1); } __m128 test_mm256_maskz_extractf32x4_ps(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_extractf32x4_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_extractf32x4_ps + // X64: entry: + // X64-NEXT: %extract = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract1, <4 x float> %extract, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 return _mm256_maskz_extractf32x4_ps(__U, __A, 1); } __m128i test_mm256_extracti32x4_epi32(__m256i __A) { - // CHECK-LABEL: @test_mm256_extracti32x4_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> + // X64-LABEL: test_mm256_extracti32x4_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %extract = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + // X64-NEXT: %1 = bitcast <4 x i32> %extract to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_extracti32x4_epi32(__A, 1); } __m128i test_mm256_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_extracti32x4_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_extracti32x4_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %extract = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract1 = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract1, <4 x i32> %extract, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm256_mask_extracti32x4_epi32(__W, __U, __A, 1); } __m128i test_mm256_maskz_extracti32x4_epi32(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_extracti32x4_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_extracti32x4_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %extract = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract1 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract1, <4 x i32> %extract, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm256_maskz_extracti32x4_epi32(__U, __A, 1); } __m256 test_mm256_insertf32x4(__m256 __A, __m128 __B) { - // CHECK-LABEL: @test_mm256_insertf32x4 - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> + // X64-LABEL: test_mm256_insertf32x4 + // X64: entry: + // X64-NEXT: %widen = shufflevector <4 x float> %__B, <4 x float> undef, <8 x i32> + // X64-NEXT: %insert = shufflevector <8 x float> %__A, <8 x float> %widen, <8 x i32> + // X64-NEXT: ret <8 x float> %insert return _mm256_insertf32x4(__A, __B, 1); } __m256 test_mm256_mask_insertf32x4(__m256 __W, __mmask8 __U, __m256 __A, __m128 __B) { - // CHECK-LABEL: @test_mm256_mask_insertf32x4 - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_insertf32x4 + // X64: entry: + // X64-NEXT: %widen = shufflevector <4 x float> %__B, <4 x float> undef, <8 x i32> + // X64-NEXT: %insert = shufflevector <8 x float> %__A, <8 x float> %widen, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %insert, <8 x float> %__W + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_insertf32x4(__W, __U, __A, __B, 1); } __m256 test_mm256_maskz_insertf32x4(__mmask8 __U, __m256 __A, __m128 __B) { - // CHECK-LABEL: @test_mm256_maskz_insertf32x4 - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_insertf32x4 + // X64: entry: + // X64-NEXT: %widen = shufflevector <4 x float> %__B, <4 x float> undef, <8 x i32> + // X64-NEXT: %insert = shufflevector <8 x float> %__A, <8 x float> %widen, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %insert, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_insertf32x4(__U, __A, __B, 1); } __m256i test_mm256_inserti32x4(__m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_inserti32x4 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // X64-LABEL: test_mm256_inserti32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %widen = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> + // X64-NEXT: %insert = shufflevector <8 x i32> %0, <8 x i32> %widen, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i32> %insert to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm256_inserti32x4(__A, __B, 1); } __m256i test_mm256_mask_inserti32x4(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_mask_inserti32x4 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_inserti32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %widen = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> + // X64-NEXT: %insert = shufflevector <8 x i32> %0, <8 x i32> %widen, <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %insert, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_inserti32x4(__W, __U, __A, __B, 1); } __m256i test_mm256_maskz_inserti32x4(__mmask8 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_maskz_inserti32x4 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_inserti32x4 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %widen = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> + // X64-NEXT: %insert = shufflevector <8 x i32> %0, <8 x i32> %widen, <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %insert, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_inserti32x4(__U, __A, __B, 1); } __m128d test_mm_getmant_pd(__m128d __A) { - // CHECK-LABEL: @test_mm_getmant_pd - // CHECK: @llvm.x86.avx512.mask.getmant.pd.128 + // X64-LABEL: test_mm_getmant_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %__A, i32 9, <2 x double> zeroinitializer, i8 -1) + // X64-NEXT: ret <2 x double> %0 return _mm_getmant_pd(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m128d test_mm_mask_getmant_pd(__m128d __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_getmant_pd - // CHECK: @llvm.x86.avx512.mask.getmant.pd.128 + // X64-LABEL: test_mm_mask_getmant_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %__A, i32 9, <2 x double> %__W, i8 %__U) + // X64-NEXT: ret <2 x double> %0 return _mm_mask_getmant_pd(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m128d test_mm_maskz_getmant_pd(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_getmant_pd - // CHECK: @llvm.x86.avx512.mask.getmant.pd.128 + // X64-LABEL: test_mm_maskz_getmant_pd + // X64: entry: + // X64-NEXT: %0 = tail call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %__A, i32 9, <2 x double> zeroinitializer, i8 %__U) + // X64-NEXT: ret <2 x double> %0 return _mm_maskz_getmant_pd(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m256d test_mm256_getmant_pd(__m256d __A) { - // CHECK-LABEL: @test_mm256_getmant_pd - // CHECK: @llvm.x86.avx512.mask.getmant.pd.256 + // X64-LABEL: test_mm256_getmant_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %__A, i32 9, <4 x double> zeroinitializer, i8 -1) + // X64-NEXT: ret <4 x double> %0 return _mm256_getmant_pd(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m256d test_mm256_mask_getmant_pd(__m256d __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_getmant_pd - // CHECK: @llvm.x86.avx512.mask.getmant.pd.256 + // X64-LABEL: test_mm256_mask_getmant_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %__A, i32 9, <4 x double> %__W, i8 %__U) + // X64-NEXT: ret <4 x double> %0 return _mm256_mask_getmant_pd(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m256d test_mm256_maskz_getmant_pd(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_getmant_pd - // CHECK: @llvm.x86.avx512.mask.getmant.pd.256 + // X64-LABEL: test_mm256_maskz_getmant_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %__A, i32 9, <4 x double> zeroinitializer, i8 %__U) + // X64-NEXT: ret <4 x double> %0 return _mm256_maskz_getmant_pd(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m128 test_mm_getmant_ps(__m128 __A) { - // CHECK-LABEL: @test_mm_getmant_ps - // CHECK: @llvm.x86.avx512.mask.getmant.ps.128 + // X64-LABEL: test_mm_getmant_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %__A, i32 9, <4 x float> zeroinitializer, i8 -1) + // X64-NEXT: ret <4 x float> %0 return _mm_getmant_ps(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m128 test_mm_mask_getmant_ps(__m128 __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_getmant_ps - // CHECK: @llvm.x86.avx512.mask.getmant.ps.128 + // X64-LABEL: test_mm_mask_getmant_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %__A, i32 9, <4 x float> %__W, i8 %__U) + // X64-NEXT: ret <4 x float> %0 return _mm_mask_getmant_ps(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m128 test_mm_maskz_getmant_ps(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_getmant_ps - // CHECK: @llvm.x86.avx512.mask.getmant.ps.128 + // X64-LABEL: test_mm_maskz_getmant_ps + // X64: entry: + // X64-NEXT: %0 = tail call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %__A, i32 9, <4 x float> zeroinitializer, i8 %__U) + // X64-NEXT: ret <4 x float> %0 return _mm_maskz_getmant_ps(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m256 test_mm256_getmant_ps(__m256 __A) { - // CHECK-LABEL: @test_mm256_getmant_ps - // CHECK: @llvm.x86.avx512.mask.getmant.ps.256 + // X64-LABEL: test_mm256_getmant_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %__A, i32 9, <8 x float> zeroinitializer, i8 -1) + // X64-NEXT: ret <8 x float> %0 return _mm256_getmant_ps(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m256 test_mm256_mask_getmant_ps(__m256 __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_getmant_ps - // CHECK: @llvm.x86.avx512.mask.getmant.ps.256 + // X64-LABEL: test_mm256_mask_getmant_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %__A, i32 9, <8 x float> %__W, i8 %__U) + // X64-NEXT: ret <8 x float> %0 return _mm256_mask_getmant_ps(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m256 test_mm256_maskz_getmant_ps(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_getmant_ps - // CHECK: @llvm.x86.avx512.mask.getmant.ps.256 + // X64-LABEL: test_mm256_maskz_getmant_ps + // X64: entry: + // X64-NEXT: %0 = tail call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %__A, i32 9, <8 x float> zeroinitializer, i8 %__U) + // X64-NEXT: ret <8 x float> %0 return _mm256_maskz_getmant_ps(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m128d test_mm_mmask_i64gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { - // CHECK-LABEL: @test_mm_mmask_i64gather_pd - // CHECK: @llvm.x86.avx512.mask.gather3div2.df + // X64-LABEL: test_mm_mmask_i64gather_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = tail call <2 x double> @llvm.x86.avx512.mask.gather3div2.df(<2 x double> %__v1_old, i8* %__addr, <2 x i64> %__index, <2 x i1> %extract, i32 2) + // X64-NEXT: ret <2 x double> %1 return _mm_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); } __m128i test_mm_mmask_i64gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { - // CHECK-LABEL: @test_mm_mmask_i64gather_epi64 - // CHECK: @llvm.x86.avx512.mask.gather3div2.di + // X64-LABEL: test_mm_mmask_i64gather_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = tail call <2 x i64> @llvm.x86.avx512.mask.gather3div2.di(<2 x i64> %__v1_old, i8* %__addr, <2 x i64> %__index, <2 x i1> %extract, i32 2) + // X64-NEXT: ret <2 x i64> %1 return _mm_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); } __m256d test_mm256_mmask_i64gather_pd(__m256d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm256_mmask_i64gather_pd - // CHECK: @llvm.x86.avx512.mask.gather3div4.df + // X64-LABEL: test_mm256_mmask_i64gather_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x double> @llvm.x86.avx512.mask.gather3div4.df(<4 x double> %__v1_old, i8* %__addr, <4 x i64> %__index, <4 x i1> %extract, i32 2) + // X64-NEXT: ret <4 x double> %1 return _mm256_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); } __m256i test_mm256_mmask_i64gather_epi64(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm256_mmask_i64gather_epi64 - // CHECK: @llvm.x86.avx512.mask.gather3div4.di + // X64-LABEL: test_mm256_mmask_i64gather_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx512.mask.gather3div4.di(<4 x i64> %__v1_old, i8* %__addr, <4 x i64> %__index, <4 x i1> %extract, i32 2) + // X64-NEXT: ret <4 x i64> %1 return _mm256_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); } __m128 test_mm_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { - // CHECK-LABEL: @test_mm_mmask_i64gather_ps - // CHECK: @llvm.x86.avx512.mask.gather3div4.sf + // X64-LABEL: test_mm_mmask_i64gather_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.gather3div4.sf(<4 x float> %__v1_old, i8* %__addr, <2 x i64> %__index, <2 x i1> %extract, i32 2) + // X64-NEXT: ret <4 x float> %1 return _mm_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); } __m128i test_mm_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { - // CHECK-LABEL: @test_mm_mmask_i64gather_epi32 - // CHECK: @llvm.x86.avx512.mask.gather3div4.si + // X64-LABEL: test_mm_mmask_i64gather_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__v1_old to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.avx512.mask.gather3div4.si(<4 x i32> %0, i8* %__addr, <2 x i64> %__index, <2 x i1> %extract, i32 2) + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); } __m128 test_mm256_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm256_mmask_i64gather_ps - // CHECK: @llvm.x86.avx512.mask.gather3div8.sf + // X64-LABEL: test_mm256_mmask_i64gather_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.gather3div8.sf(<4 x float> %__v1_old, i8* %__addr, <4 x i64> %__index, <4 x i1> %extract, i32 2) + // X64-NEXT: ret <4 x float> %1 return _mm256_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); } __m128i test_mm256_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm256_mmask_i64gather_epi32 - // CHECK: @llvm.x86.avx512.mask.gather3div8.si + // X64-LABEL: test_mm256_mmask_i64gather_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__v1_old to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x i32> @llvm.x86.avx512.mask.gather3div8.si(<4 x i32> %0, i8* %__addr, <4 x i64> %__index, <4 x i1> %extract, i32 2) + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm256_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); } __m128d test_mm_mask_i32gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { - // CHECK-LABEL: @test_mm_mask_i32gather_pd - // CHECK: @llvm.x86.avx512.mask.gather3siv2.df + // X64-LABEL: test_mm_mask_i32gather_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x double> @llvm.x86.avx512.mask.gather3siv2.df(<2 x double> %__v1_old, i8* %__addr, <4 x i32> %0, <2 x i1> %extract, i32 2) + // X64-NEXT: ret <2 x double> %2 return _mm_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); } __m128i test_mm_mask_i32gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { - // CHECK-LABEL: @test_mm_mask_i32gather_epi64 - // CHECK: @llvm.x86.avx512.mask.gather3siv2.di + // X64-LABEL: test_mm_mask_i32gather_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = tail call <2 x i64> @llvm.x86.avx512.mask.gather3siv2.di(<2 x i64> %__v1_old, i8* %__addr, <4 x i32> %0, <2 x i1> %extract, i32 2) + // X64-NEXT: ret <2 x i64> %2 return _mm_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); } __m256d test_mm256_mask_i32gather_pd(__m256d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { - // CHECK-LABEL: @test_mm256_mask_i32gather_pd - // CHECK: @llvm.x86.avx512.mask.gather3siv4.df + // X64-LABEL: test_mm256_mask_i32gather_pd + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x double> @llvm.x86.avx512.mask.gather3siv4.df(<4 x double> %__v1_old, i8* %__addr, <4 x i32> %0, <4 x i1> %extract, i32 2) + // X64-NEXT: ret <4 x double> %2 return _mm256_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); } __m256i test_mm256_mask_i32gather_epi64(__m256i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { - // CHECK-LABEL: @test_mm256_mask_i32gather_epi64 - // CHECK: @llvm.x86.avx512.mask.gather3siv4.di + // X64-LABEL: test_mm256_mask_i32gather_epi64 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x i64> @llvm.x86.avx512.mask.gather3siv4.di(<4 x i64> %__v1_old, i8* %__addr, <4 x i32> %0, <4 x i1> %extract, i32 2) + // X64-NEXT: ret <4 x i64> %2 return _mm256_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); } __m128 test_mm_mask_i32gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { - // CHECK-LABEL: @test_mm_mask_i32gather_ps - // CHECK: @llvm.x86.avx512.mask.gather3siv4.sf + // X64-LABEL: test_mm_mask_i32gather_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = tail call <4 x float> @llvm.x86.avx512.mask.gather3siv4.sf(<4 x float> %__v1_old, i8* %__addr, <4 x i32> %0, <4 x i1> %extract, i32 2) + // X64-NEXT: ret <4 x float> %2 return _mm_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); } __m128i test_mm_mask_i32gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { - // CHECK-LABEL: @test_mm_mask_i32gather_epi32 - // CHECK: @llvm.x86.avx512.mask.gather3siv4.si + // X64-LABEL: test_mm_mask_i32gather_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__v1_old to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__index to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = tail call <4 x i32> @llvm.x86.avx512.mask.gather3siv4.si(<4 x i32> %0, i8* %__addr, <4 x i32> %1, <4 x i1> %extract, i32 2) + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); } __m256 test_mm256_mask_i32gather_ps(__m256 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm256_mask_i32gather_ps - // CHECK: @llvm.x86.avx512.mask.gather3siv8.sf + // X64-LABEL: test_mm256_mask_i32gather_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %2 = tail call <8 x float> @llvm.x86.avx512.mask.gather3siv8.sf(<8 x float> %__v1_old, i8* %__addr, <8 x i32> %0, <8 x i1> %1, i32 2) + // X64-NEXT: ret <8 x float> %2 return _mm256_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); } __m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm256_mask_i32gather_epi32 - // CHECK: @llvm.x86.avx512.mask.gather3siv8.si + // X64-LABEL: test_mm256_mask_i32gather_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__v1_old to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__index to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__mask to <8 x i1> + // X64-NEXT: %3 = tail call <8 x i32> @llvm.x86.avx512.mask.gather3siv8.si(<8 x i32> %0, i8* %__addr, <8 x i32> %1, <8 x i1> %2, i32 2) + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); } __m256d test_mm256_permutex_pd(__m256d __X) { - // CHECK-LABEL: @test_mm256_permutex_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> + // X64-LABEL: test_mm256_permutex_pd + // X64: entry: + // X64-NEXT: %perm = shufflevector <4 x double> %__X, <4 x double> undef, <4 x i32> + // X64-NEXT: ret <4 x double> %perm return _mm256_permutex_pd(__X, 3); } __m256d test_mm256_mask_permutex_pd(__m256d __W, __mmask8 __U, __m256d __X) { - // CHECK-LABEL: @test_mm256_mask_permutex_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_permutex_pd + // X64: entry: + // X64-NEXT: %perm = shufflevector <4 x double> %__X, <4 x double> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x double> %perm, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_permutex_pd(__W, __U, __X, 1); } __m256d test_mm256_maskz_permutex_pd(__mmask8 __U, __m256d __X) { - // CHECK-LABEL: @test_mm256_maskz_permutex_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_permutex_pd + // X64: entry: + // X64-NEXT: %perm = shufflevector <4 x double> %__X, <4 x double> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x double> %perm, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_permutex_pd(__U, __X, 1); } __m256i test_mm256_permutex_epi64(__m256i __X) { - // CHECK-LABEL: @test_mm256_permutex_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> + // X64-LABEL: test_mm256_permutex_epi64 + // X64: entry: + // X64-NEXT: %perm = shufflevector <4 x i64> %__X, <4 x i64> undef, <4 x i32> + // X64-NEXT: ret <4 x i64> %perm return _mm256_permutex_epi64(__X, 3); } __m256i test_mm256_mask_permutex_epi64(__m256i __W, __mmask8 __M, __m256i __X) { - // CHECK-LABEL: @test_mm256_mask_permutex_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_permutex_epi64 + // X64: entry: + // X64-NEXT: %perm = shufflevector <4 x i64> %__X, <4 x i64> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x i64> %perm, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_permutex_epi64(__W, __M, __X, 3); } __m256i test_mm256_maskz_permutex_epi64(__mmask8 __M, __m256i __X) { - // CHECK-LABEL: @test_mm256_maskz_permutex_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_permutex_epi64 + // X64: entry: + // X64-NEXT: %perm = shufflevector <4 x i64> %__X, <4 x i64> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x i64> %perm, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_permutex_epi64(__M, __X, 3); } __m256d test_mm256_permutexvar_pd(__m256i __X, __m256d __Y) { - // CHECK-LABEL: @test_mm256_permutexvar_pd - // CHECK: @llvm.x86.avx512.permvar.df.256 + // X64-LABEL: test_mm256_permutexvar_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %__Y, <4 x i64> %__X) #9 + // X64-NEXT: ret <4 x double> %0 return _mm256_permutexvar_pd(__X, __Y); } __m256d test_mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y) { - // CHECK-LABEL: @test_mm256_mask_permutexvar_pd - // CHECK: @llvm.x86.avx512.permvar.df.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_permutexvar_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %__Y, <4 x i64> %__X) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__W + // X64-NEXT: ret <4 x double> %2 return _mm256_mask_permutexvar_pd(__W, __U, __X, __Y); } __m256d test_mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y) { - // CHECK-LABEL: @test_mm256_maskz_permutexvar_pd - // CHECK: @llvm.x86.avx512.permvar.df.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_permutexvar_pd + // X64: entry: + // X64-NEXT: %0 = tail call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %__Y, <4 x i64> %__X) #9 + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %2 return _mm256_maskz_permutexvar_pd(__U, __X, __Y); } __m256i test_mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_maskz_permutexvar_epi64 - // CHECK: @llvm.x86.avx512.permvar.di.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_permutexvar_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %__Y, <4 x i64> %__X) #9 + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %2 return _mm256_maskz_permutexvar_epi64(__M, __X, __Y); } __m256i test_mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_mask_permutexvar_epi64 - // CHECK: @llvm.x86.avx512.permvar.di.256 - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_permutexvar_epi64 + // X64: entry: + // X64-NEXT: %0 = tail call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %__Y, <4 x i64> %__X) #9 + // X64-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %2 return _mm256_mask_permutexvar_epi64(__W, __M, __X, __Y); } __m256 test_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) { - // CHECK-LABEL: @test_mm256_mask_permutexvar_ps - // CHECK: @llvm.x86.avx2.permps - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_permutexvar_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %__Y, <8 x i32> %0) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %__W + // X64-NEXT: ret <8 x float> %3 return _mm256_mask_permutexvar_ps(__W, __U, __X, __Y); } __m256 test_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) { - // CHECK-LABEL: @test_mm256_maskz_permutexvar_ps - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_permutexvar_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %__Y, <8 x i32> %0) #9 + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %3 return _mm256_maskz_permutexvar_ps(__U, __X, __Y); } __m256 test_mm256_permutexvar_ps(__m256i __X, __m256 __Y) { - // CHECK-LABEL: @test_mm256_permutexvar_ps - // CHECK: @llvm.x86.avx2.permps + // X64-LABEL: test_mm256_permutexvar_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %__Y, <8 x i32> %0) #9 + // X64-NEXT: ret <8 x float> %1 return _mm256_permutexvar_ps( __X, __Y); } __m256i test_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_maskz_permutexvar_epi32 - // CHECK: @llvm.x86.avx2.permd - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_permutexvar_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__Y to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_maskz_permutexvar_epi32(__M, __X, __Y); } __m256i test_mm256_permutexvar_epi32(__m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_permutexvar_epi32 - // CHECK: @llvm.x86.avx2.permd + // X64-LABEL: test_mm256_permutexvar_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__Y to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_permutexvar_epi32(__X, __Y); } __m256i test_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_mask_permutexvar_epi32 - // CHECK: @llvm.x86.avx2.permd - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_permutexvar_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__Y to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__X to <8 x i32> + // X64-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %0, <8 x i32> %1) #9 + // X64-NEXT: %3 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // X64-NEXT: %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 + // X64-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // X64-NEXT: ret <4 x i64> %6 return _mm256_mask_permutexvar_epi32(__W, __M, __X, __Y); } __m128i test_mm_alignr_epi32(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_alignr_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> + // X64-LABEL: test_mm_alignr_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %valign = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> + // X64-NEXT: %2 = bitcast <4 x i32> %valign to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_alignr_epi32(__A, __B, 1); } __m128i test_mm_mask_alignr_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_alignr_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_alignr_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %valign = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> + // X64-NEXT: %2 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = select <4 x i1> %extract, <4 x i32> %valign, <4 x i32> %2 + // X64-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // X64-NEXT: ret <2 x i64> %5 return _mm_mask_alignr_epi32(__W, __U, __A, __B, 1); } __m128i test_mm_maskz_alignr_epi32(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_alignr_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_alignr_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // X64-NEXT: %valign = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract, <4 x i32> %valign, <4 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_maskz_alignr_epi32(__U, __A, __B, 1); } __m256i test_mm256_alignr_epi32(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_alignr_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // X64-LABEL: test_mm256_alignr_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %valign = shufflevector <8 x i32> %1, <8 x i32> %0, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i32> %valign to <4 x i64> + // X64-NEXT: ret <4 x i64> %2 return _mm256_alignr_epi32(__A, __B, 1); } __m256i test_mm256_mask_alignr_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_alignr_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_alignr_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %valign = shufflevector <8 x i32> %1, <8 x i32> %0, <8 x i32> + // X64-NEXT: %2 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %4 = select <8 x i1> %3, <8 x i32> %valign, <8 x i32> %2 + // X64-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // X64-NEXT: ret <4 x i64> %5 return _mm256_mask_alignr_epi32(__W, __U, __A, __B, 1); } __m256i test_mm256_maskz_alignr_epi32(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_alignr_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_alignr_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // X64-NEXT: %valign = shufflevector <8 x i32> %1, <8 x i32> %0, <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %valign, <8 x i32> zeroinitializer + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_maskz_alignr_epi32(__U, __A, __B, 1); } __m128i test_mm_alignr_epi64(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_alignr_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> + // X64-LABEL: test_mm_alignr_epi64 + // X64: entry: + // X64-NEXT: %valign = shufflevector <2 x i64> %__B, <2 x i64> %__A, <2 x i32> + // X64-NEXT: ret <2 x i64> %valign return _mm_alignr_epi64(__A, __B, 1); } __m128i test_mm_mask_alignr_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_alignr_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_mask_alignr_epi64 + // X64: entry: + // X64-NEXT: %valign = shufflevector <2 x i64> %__B, <2 x i64> %__A, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract, <2 x i64> %valign, <2 x i64> %__W + // X64-NEXT: ret <2 x i64> %1 return _mm_mask_alignr_epi64(__W, __U, __A, __B, 1); } __m128i test_mm_maskz_alignr_epi64(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_alignr_epi64 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + // X64-LABEL: test_mm_maskz_alignr_epi64 + // X64: entry: + // X64-NEXT: %valign = shufflevector <2 x i64> %__B, <2 x i64> %__A, <2 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract, <2 x i64> %valign, <2 x i64> zeroinitializer + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_alignr_epi64(__U, __A, __B, 1); } __m256i test_mm256_alignr_epi64(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_alignr_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> + // X64-LABEL: test_mm256_alignr_epi64 + // X64: entry: + // X64-NEXT: %valign = shufflevector <4 x i64> %__B, <4 x i64> %__A, <4 x i32> + // X64-NEXT: ret <4 x i64> %valign return _mm256_alignr_epi64(__A, __B, 1); } __m256i test_mm256_mask_alignr_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_alignr_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_mask_alignr_epi64 + // X64: entry: + // X64-NEXT: %valign = shufflevector <4 x i64> %__B, <4 x i64> %__A, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x i64> %valign, <4 x i64> %__W + // X64-NEXT: ret <4 x i64> %1 return _mm256_mask_alignr_epi64(__W, __U, __A, __B, 1); } __m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_alignr_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + // X64-LABEL: test_mm256_maskz_alignr_epi64 + // X64: entry: + // X64-NEXT: %valign = shufflevector <4 x i64> %__B, <4 x i64> %__A, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract, <4 x i64> %valign, <4 x i64> zeroinitializer + // X64-NEXT: ret <4 x i64> %1 return _mm256_maskz_alignr_epi64(__U, __A, __B, 1); } __m128 test_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_movehdup_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_movehdup_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %shuffle.i.i, <4 x float> %__W + // X64-NEXT: ret <4 x float> %1 return _mm_mask_movehdup_ps(__W, __U, __A); } __m128 test_mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_movehdup_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_movehdup_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %shuffle.i.i, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_movehdup_ps(__U, __A); } __m256 test_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_movehdup_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}} <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_movehdup_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x float> %__A, <8 x float> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuffle.i.i, <8 x float> %__W + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_movehdup_ps(__W, __U, __A); } __m256 test_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_movehdup_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}} <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_movehdup_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x float> %__A, <8 x float> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuffle.i.i, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_movehdup_ps(__U, __A); } __m128 test_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_moveldup_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_moveldup_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %shuffle.i.i, <4 x float> %__W + // X64-NEXT: ret <4 x float> %1 return _mm_mask_moveldup_ps(__W, __U, __A); } __m128 test_mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_moveldup_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> - // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_moveldup_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <4 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %shuffle.i.i, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_moveldup_ps(__U, __A); } __m256 test_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_moveldup_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}} <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_moveldup_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x float> %__A, <8 x float> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuffle.i.i, <8 x float> %__W + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_moveldup_ps(__W, __U, __A); } __m256 test_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_moveldup_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}} <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_moveldup_ps + // X64: entry: + // X64-NEXT: %shuffle.i.i = shufflevector <8 x float> %__A, <8 x float> undef, <8 x i32> + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %shuffle.i.i, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_moveldup_ps(__U, __A); } __m128i test_mm_mask_shuffle_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_shuffle_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_mask_shuffle_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %permil = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__W to <4 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + // X64-NEXT: %3 = select <4 x i1> %extract, <4 x i32> %permil, <4 x i32> %1 + // X64-NEXT: %4 = bitcast <4 x i32> %3 to <2 x i64> + // X64-NEXT: ret <2 x i64> %4 return _mm_mask_shuffle_epi32(__W, __U, __A, 1); } __m128i test_mm_maskz_shuffle_epi32(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_shuffle_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // X64-LABEL: test_mm_maskz_shuffle_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // X64-NEXT: %permil = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = select <4 x i1> %extract, <4 x i32> %permil, <4 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <4 x i32> %2 to <2 x i64> + // X64-NEXT: ret <2 x i64> %3 return _mm_maskz_shuffle_epi32(__U, __A, 2); } __m256i test_mm256_mask_shuffle_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_shuffle_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_mask_shuffle_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %permil = shufflevector <8 x i32> %0, <8 x i32> undef, <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__W to <8 x i32> + // X64-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %3 = select <8 x i1> %2, <8 x i32> %permil, <8 x i32> %1 + // X64-NEXT: %4 = bitcast <8 x i32> %3 to <4 x i64> + // X64-NEXT: ret <4 x i64> %4 return _mm256_mask_shuffle_epi32(__W, __U, __A, 2); } __m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_shuffle_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // X64-LABEL: test_mm256_maskz_shuffle_epi32 + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // X64-NEXT: %permil = shufflevector <8 x i32> %0, <8 x i32> undef, <8 x i32> + // X64-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %2 = select <8 x i1> %1, <8 x i32> %permil, <8 x i32> zeroinitializer + // X64-NEXT: %3 = bitcast <8 x i32> %2 to <4 x i64> + // X64-NEXT: ret <4 x i64> %3 return _mm256_maskz_shuffle_epi32(__U, __A, 2); } __m128d test_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_mov_pd - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_mask_mov_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %__A, <2 x double> %__W + // X64-NEXT: ret <2 x double> %1 return _mm_mask_mov_pd(__W, __U, __A); } __m128d test_mm_maskz_mov_pd(__mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_maskz_mov_pd - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + // X64-LABEL: test_mm_maskz_mov_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + // X64-NEXT: %1 = select <2 x i1> %extract.i, <2 x double> %__A, <2 x double> zeroinitializer + // X64-NEXT: ret <2 x double> %1 return _mm_maskz_mov_pd(__U, __A); } __m256d test_mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_mov_pd - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_mask_mov_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %__A, <4 x double> %__W + // X64-NEXT: ret <4 x double> %1 return _mm256_mask_mov_pd(__W, __U, __A); } __m256d test_mm256_maskz_mov_pd(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_mov_pd - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + // X64-LABEL: test_mm256_maskz_mov_pd + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x double> %__A, <4 x double> zeroinitializer + // X64-NEXT: ret <4 x double> %1 return _mm256_maskz_mov_pd(__U, __A); } __m128 test_mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_mov_ps - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_mask_mov_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %__A, <4 x float> %__W + // X64-NEXT: ret <4 x float> %1 return _mm_mask_mov_ps(__W, __U, __A); } __m128 test_mm_maskz_mov_ps(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_mov_ps - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // X64-LABEL: test_mm_maskz_mov_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + // X64-NEXT: %1 = select <4 x i1> %extract.i, <4 x float> %__A, <4 x float> zeroinitializer + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_mov_ps(__U, __A); } __m256 test_mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_mov_ps - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_mask_mov_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %__A, <8 x float> %__W + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_mov_ps(__W, __U, __A); } __m256 test_mm256_maskz_mov_ps(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_mov_ps - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // X64-LABEL: test_mm256_maskz_mov_ps + // X64: entry: + // X64-NEXT: %0 = bitcast i8 %__U to <8 x i1> + // X64-NEXT: %1 = select <8 x i1> %0, <8 x float> %__A, <8 x float> zeroinitializer + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_mov_ps(__U, __A); } __m128 test_mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtph_ps - // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.128 + // X64-LABEL: test_mm_mask_cvtph_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %0, <4 x float> %__W, i8 %__U) #9 + // X64-NEXT: ret <4 x float> %1 return _mm_mask_cvtph_ps(__W, __U, __A); } __m128 test_mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtph_ps - // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.128 + // X64-LABEL: test_mm_maskz_cvtph_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %1 = tail call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %0, <4 x float> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <4 x float> %1 return _mm_maskz_cvtph_ps(__U, __A); } __m256 test_mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtph_ps - // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.256 + // X64-LABEL: test_mm256_mask_cvtph_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %0, <8 x float> %__W, i8 %__U) #9 + // X64-NEXT: ret <8 x float> %1 return _mm256_mask_cvtph_ps(__W, __U, __A); } __m256 test_mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtph_ps - // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.256 + // X64-LABEL: test_mm256_maskz_cvtph_ps + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // X64-NEXT: %1 = tail call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %0, <8 x float> zeroinitializer, i8 %__U) #9 + // X64-NEXT: ret <8 x float> %1 return _mm256_maskz_cvtph_ps(__U, __A); } __m128i test_mm_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_cvtps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 + // X64-LABEL: test_mm_mask_cvtps_ph + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <8 x i16> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %__A, i32 4, <8 x i16> %0, i8 %__U) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvtps_ph(__W, __U, __A); } __m128i test_mm_maskz_cvtps_ph(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_cvtps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 + // X64-LABEL: test_mm_maskz_cvtps_ph + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %__A, i32 4, <8 x i16> zeroinitializer, i8 %__U) #9 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvtps_ph(__U, __A); } __m128i test_mm256_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_cvtps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 + // X64-LABEL: test_mm256_mask_cvtps_ph + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <8 x i16> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %__A, i32 4, <8 x i16> %0, i8 %__U) #9 + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_mask_cvtps_ph(__W, __U, __A); } __m128i test_mm256_maskz_cvtps_ph(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 + // X64-LABEL: test_mm256_maskz_cvtps_ph + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %__A, i32 4, <8 x i16> zeroinitializer, i8 %__U) #9 + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_maskz_cvtps_ph(__U, __A); } __m128i test_mm_mask_cvt_roundps_ph(__m128i __W, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_cvt_roundps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 + // X64-LABEL: test_mm_mask_cvt_roundps_ph + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <8 x i16> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %__A, i32 4, <8 x i16> %0, i8 %__U) + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); } __m128i test_mm_maskz_cvt_roundps_ph(__mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_maskz_cvt_roundps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 + // X64-LABEL: test_mm_maskz_cvt_roundps_ph + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %__A, i32 4, <8 x i16> zeroinitializer, i8 %__U) + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); } __m128i test_mm256_mask_cvt_roundps_ph(__m128i __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_mask_cvt_roundps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 + // X64-LABEL: test_mm256_mask_cvt_roundps_ph + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__W to <8 x i16> + // X64-NEXT: %1 = tail call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %__A, i32 4, <8 x i16> %0, i8 %__U) + // X64-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // X64-NEXT: ret <2 x i64> %2 return _mm256_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); } __m128i test_mm256_maskz_cvt_roundps_ph(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm256_maskz_cvt_roundps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 + // X64-LABEL: test_mm256_maskz_cvt_roundps_ph + // X64: entry: + // X64-NEXT: %0 = tail call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %__A, i32 4, <8 x i16> zeroinitializer, i8 %__U) + // X64-NEXT: %1 = bitcast <8 x i16> %0 to <2 x i64> + // X64-NEXT: ret <2 x i64> %1 return _mm256_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); } __mmask8 test_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpeq_epi32_mask - // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpeq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp eq <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmpeq_epi32_mask(__a, __b); } __mmask8 test_mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpeq_epi32_mask - // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpeq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp eq <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmpeq_epi32_mask(__u, __a, __b); } __mmask8 test_mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpeq_epi64_mask - // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpeq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmpeq_epi64_mask(__u, __a, __b); } __mmask8 test_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpeq_epi64_mask - // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpeq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmpeq_epi64_mask(__a, __b); } __mmask8 test_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpgt_epi32_mask - // CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpgt_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp sgt <4 x i32> %0, %1 + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_cmpgt_epi32_mask(__a, __b); } __mmask8 test_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpgt_epi32_mask - // CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpgt_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <2 x i64> %__a to <4 x i32> + // X64-NEXT: %1 = bitcast <2 x i64> %__b to <4 x i32> + // X64-NEXT: %2 = icmp sgt <4 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // X64-NEXT: %4 = and <4 x i1> %2, %extract + // X64-NEXT: %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %6 = bitcast <8 x i1> %5 to i8 + // X64-NEXT: ret i8 %6 return (__mmask8)_mm_mask_cmpgt_epi32_mask(__u, __a, __b); } __mmask8 test_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpgt_epi64_mask - // CHECK: icmp sgt <2 x i64> %{{.*}}, %{{.*}} - // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_mask_cmpgt_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sgt <2 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + // X64-NEXT: %2 = and <2 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm_mask_cmpgt_epi64_mask(__u, __a, __b); } __mmask8 test_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpgt_epi64_mask - // CHECK: icmp sgt <2 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm_cmpgt_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sgt <2 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm_cmpgt_epi64_mask(__a, __b); } __mmask8 test_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpeq_epi32_mask - // CHECK: icmp eq <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpeq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp eq <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm256_cmpeq_epi32_mask(__a, __b); } __mmask8 test_mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpeq_epi32_mask - // CHECK: icmp eq <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpeq_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp eq <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %4 = and <8 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return (__mmask8)_mm256_mask_cmpeq_epi32_mask(__u, __a, __b); } __mmask8 test_mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpeq_epi64_mask - // CHECK: icmp eq <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpeq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <4 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm256_mask_cmpeq_epi64_mask(__u, __a, __b); } __mmask8 test_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpeq_epi64_mask - // CHECK: icmp eq <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpeq_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp eq <4 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm256_cmpeq_epi64_mask(__a, __b); } __mmask8 test_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpgt_epi32_mask - // CHECK: icmp sgt <8 x i32> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpgt_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp sgt <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // X64-NEXT: ret i8 %3 return (__mmask8)_mm256_cmpgt_epi32_mask(__a, __b); } __mmask8 test_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpgt_epi32_mask - // CHECK: icmp sgt <8 x i32> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpgt_epi32_mask + // X64: entry: + // X64-NEXT: %0 = bitcast <4 x i64> %__a to <8 x i32> + // X64-NEXT: %1 = bitcast <4 x i64> %__b to <8 x i32> + // X64-NEXT: %2 = icmp sgt <8 x i32> %0, %1 + // X64-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %4 = and <8 x i1> %2, %3 + // X64-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // X64-NEXT: ret i8 %5 return (__mmask8)_mm256_mask_cmpgt_epi32_mask(__u, __a, __b); } __mmask8 test_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpgt_epi64_mask - // CHECK: icmp sgt <4 x i64> %{{.*}}, %{{.*}} - // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_mask_cmpgt_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sgt <4 x i64> %__a, %__b + // X64-NEXT: %1 = bitcast i8 %__u to <8 x i1> + // X64-NEXT: %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + // X64-NEXT: %2 = and <4 x i1> %0, %extract + // X64-NEXT: %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // X64-NEXT: ret i8 %4 return (__mmask8)_mm256_mask_cmpgt_epi64_mask(__u, __a, __b); } __mmask8 test_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpgt_epi64_mask - // CHECK: icmp sgt <4 x i64> %{{.*}}, %{{.*}} + // X64-LABEL: test_mm256_cmpgt_epi64_mask + // X64: entry: + // X64-NEXT: %0 = icmp sgt <4 x i64> %__a, %__b + // X64-NEXT: %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + // X64-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // X64-NEXT: ret i8 %2 return (__mmask8)_mm256_cmpgt_epi64_mask(__a, __b); } diff --git a/clang/test/CodeGen/avx512vlbw-builtins.c b/clang/test/CodeGen/avx512vlbw-builtins.c --- a/clang/test/CodeGen/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/avx512vlbw-builtins.c @@ -1,3211 +1,9617 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s - +// RUN: %clang_cc1 -O1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=SIGNED-CHAR +// RUN: %clang_cc1 -O1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=SIGNED-CHAR +// RUN: %clang_cc1 -O1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=NO-SIGNED-CHAR +// RUN: %clang_cc1 -O1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=NO-SIGNED-CHAR #include __mmask32 test_mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpeq_epi8_mask - // CHECK: icmp eq <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpeq_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpeq_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmpeq_epi8_mask(__a, __b); } __mmask32 test_mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpeq_epi8_mask - // CHECK: icmp eq <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpeq_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpeq_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmpeq_epi8_mask(__u, __a, __b); } __mmask16 test_mm_cmpeq_epi8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpeq_epi8_mask - // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpeq_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpeq_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmpeq_epi8_mask(__a, __b); } __mmask16 test_mm_mask_cmpeq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpeq_epi8_mask - // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpeq_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpeq_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmpeq_epi8_mask(__u, __a, __b); } __mmask16 test_mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpeq_epi16_mask - // CHECK: icmp eq <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpeq_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpeq_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmpeq_epi16_mask(__a, __b); } __mmask16 test_mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpeq_epi16_mask - // CHECK: icmp eq <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpeq_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpeq_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmpeq_epi16_mask(__u, __a, __b); } __mmask8 test_mm_cmpeq_epi16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpeq_epi16_mask - // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpeq_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpeq_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmpeq_epi16_mask(__a, __b); } __mmask8 test_mm_mask_cmpeq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpeq_epi16_mask - // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpeq_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpeq_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmpeq_epi16_mask(__u, __a, __b); } __mmask32 test_mm256_cmpgt_epi8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpgt_epi8_mask - // CHECK: icmp sgt <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpgt_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpgt_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmpgt_epi8_mask(__a, __b); } __mmask32 test_mm256_mask_cmpgt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpgt_epi8_mask - // CHECK: icmp sgt <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpgt_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpgt_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmpgt_epi8_mask(__u, __a, __b); } __mmask16 test_mm_cmpgt_epi8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpgt_epi8_mask - // CHECK: icmp sgt <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpgt_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpgt_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmpgt_epi8_mask(__a, __b); } __mmask16 test_mm_mask_cmpgt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpgt_epi8_mask - // CHECK: icmp sgt <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpgt_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpgt_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmpgt_epi8_mask(__u, __a, __b); } __mmask16 test_mm256_cmpgt_epi16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpgt_epi16_mask - // CHECK: icmp sgt <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpgt_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpgt_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmpgt_epi16_mask(__a, __b); } __mmask16 test_mm256_mask_cmpgt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpgt_epi16_mask - // CHECK: icmp sgt <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpgt_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpgt_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmpgt_epi16_mask(__u, __a, __b); } __mmask8 test_mm_cmpgt_epi16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpgt_epi16_mask - // CHECK: icmp sgt <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpgt_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpgt_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmpgt_epi16_mask(__a, __b); } __mmask8 test_mm_mask_cmpgt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpgt_epi16_mask - // CHECK: icmp sgt <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpgt_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpgt_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmpgt_epi16_mask(__u, __a, __b); } __mmask16 test_mm_cmpeq_epu8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpeq_epu8_mask - // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpeq_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpeq_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmpeq_epu8_mask(__a, __b); } __mmask16 test_mm_mask_cmpeq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpeq_epu8_mask - // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpeq_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpeq_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmpeq_epu8_mask(__u, __a, __b); } __mmask8 test_mm_cmpeq_epu16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpeq_epu16_mask - // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpeq_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpeq_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmpeq_epu16_mask(__a, __b); } __mmask8 test_mm_mask_cmpeq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpeq_epu16_mask - // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpeq_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpeq_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmpeq_epu16_mask(__u, __a, __b); } __mmask32 test_mm256_cmpeq_epu8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpeq_epu8_mask - // CHECK: icmp eq <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpeq_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpeq_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmpeq_epu8_mask(__a, __b); } __mmask32 test_mm256_mask_cmpeq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpeq_epu8_mask - // CHECK: icmp eq <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpeq_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpeq_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmpeq_epu8_mask(__u, __a, __b); } __mmask16 test_mm256_cmpeq_epu16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpeq_epu16_mask - // CHECK: icmp eq <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpeq_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpeq_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmpeq_epu16_mask(__a, __b); } __mmask16 test_mm256_mask_cmpeq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpeq_epu16_mask - // CHECK: icmp eq <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpeq_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpeq_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmpeq_epu16_mask(__u, __a, __b); } __mmask16 test_mm_cmpgt_epu8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpgt_epu8_mask - // CHECK: icmp ugt <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpgt_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpgt_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmpgt_epu8_mask(__a, __b); } __mmask16 test_mm_mask_cmpgt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpgt_epu8_mask - // CHECK: icmp ugt <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpgt_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpgt_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmpgt_epu8_mask(__u, __a, __b); } __mmask8 test_mm_cmpgt_epu16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpgt_epu16_mask - // CHECK: icmp ugt <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpgt_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpgt_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmpgt_epu16_mask(__a, __b); } __mmask8 test_mm_mask_cmpgt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpgt_epu16_mask - // CHECK: icmp ugt <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpgt_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpgt_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmpgt_epu16_mask(__u, __a, __b); } __mmask32 test_mm256_cmpgt_epu8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpgt_epu8_mask - // CHECK: icmp ugt <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpgt_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpgt_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmpgt_epu8_mask(__a, __b); } __mmask32 test_mm256_mask_cmpgt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpgt_epu8_mask - // CHECK: icmp ugt <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpgt_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpgt_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmpgt_epu8_mask(__u, __a, __b); } __mmask16 test_mm256_cmpgt_epu16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpgt_epu16_mask - // CHECK: icmp ugt <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpgt_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpgt_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmpgt_epu16_mask(__a, __b); } __mmask16 test_mm256_mask_cmpgt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpgt_epu16_mask - // CHECK: icmp ugt <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpgt_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpgt_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmpgt_epu16_mask(__u, __a, __b); } __mmask16 test_mm_cmpge_epi8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpge_epi8_mask - // CHECK: icmp sge <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpge_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sge <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpge_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sge <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmpge_epi8_mask(__a, __b); } __mmask16 test_mm_mask_cmpge_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpge_epi8_mask - // CHECK: icmp sge <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpge_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sge <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpge_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sge <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmpge_epi8_mask(__u, __a, __b); } __mmask16 test_mm_cmpge_epu8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpge_epu8_mask - // CHECK: icmp uge <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpge_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp uge <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpge_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp uge <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmpge_epu8_mask(__a, __b); } __mmask16 test_mm_mask_cmpge_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpge_epu8_mask - // CHECK: icmp uge <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpge_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp uge <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpge_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp uge <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmpge_epu8_mask(__u, __a, __b); } __mmask8 test_mm_cmpge_epi16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpge_epi16_mask - // CHECK: icmp sge <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpge_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sge <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpge_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sge <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmpge_epi16_mask(__a, __b); } __mmask8 test_mm_mask_cmpge_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpge_epi16_mask - // CHECK: icmp sge <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpge_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sge <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpge_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sge <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmpge_epi16_mask(__u, __a, __b); } __mmask8 test_mm_cmpge_epu16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpge_epu16_mask - // CHECK: icmp uge <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpge_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp uge <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpge_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp uge <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmpge_epu16_mask(__a, __b); } __mmask8 test_mm_mask_cmpge_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpge_epu16_mask - // CHECK: icmp uge <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpge_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp uge <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpge_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp uge <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmpge_epu16_mask(__u, __a, __b); } __mmask32 test_mm256_cmpge_epi8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpge_epi8_mask - // CHECK: icmp sge <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpge_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sge <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpge_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sge <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmpge_epi8_mask(__a, __b); } __mmask32 test_mm256_mask_cmpge_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpge_epi8_mask - // CHECK: icmp sge <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpge_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sge <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpge_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sge <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmpge_epi8_mask(__u, __a, __b); } __mmask32 test_mm256_cmpge_epu8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpge_epu8_mask - // CHECK: icmp uge <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpge_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp uge <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpge_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp uge <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmpge_epu8_mask(__a, __b); } __mmask32 test_mm256_mask_cmpge_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpge_epu8_mask - // CHECK: icmp uge <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpge_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp uge <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpge_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp uge <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmpge_epu8_mask(__u, __a, __b); } __mmask16 test_mm256_cmpge_epi16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpge_epi16_mask - // CHECK: icmp sge <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpge_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sge <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpge_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sge <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmpge_epi16_mask(__a, __b); } __mmask16 test_mm256_mask_cmpge_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpge_epi16_mask - // CHECK: icmp sge <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpge_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sge <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpge_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sge <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmpge_epi16_mask(__u, __a, __b); } __mmask16 test_mm256_cmpge_epu16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpge_epu16_mask - // CHECK: icmp uge <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpge_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp uge <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpge_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp uge <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmpge_epu16_mask(__a, __b); } __mmask16 test_mm256_mask_cmpge_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpge_epu16_mask - // CHECK: icmp uge <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpge_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp uge <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpge_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp uge <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmpge_epu16_mask(__u, __a, __b); } __mmask16 test_mm_cmple_epi8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmple_epi8_mask - // CHECK: icmp sle <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmple_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sle <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmple_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sle <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmple_epi8_mask(__a, __b); } __mmask16 test_mm_mask_cmple_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmple_epi8_mask - // CHECK: icmp sle <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmple_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sle <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmple_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sle <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmple_epi8_mask(__u, __a, __b); } __mmask16 test_mm_cmple_epu8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmple_epu8_mask - // CHECK: icmp ule <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmple_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ule <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmple_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ule <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmple_epu8_mask(__a, __b); } __mmask16 test_mm_mask_cmple_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmple_epu8_mask - // CHECK: icmp ule <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmple_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ule <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmple_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ule <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmple_epu8_mask(__u, __a, __b); } __mmask8 test_mm_cmple_epi16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmple_epi16_mask - // CHECK: icmp sle <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmple_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sle <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmple_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sle <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmple_epi16_mask(__a, __b); } __mmask8 test_mm_mask_cmple_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmple_epi16_mask - // CHECK: icmp sle <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmple_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sle <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmple_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sle <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmple_epi16_mask(__u, __a, __b); } __mmask8 test_mm_cmple_epu16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmple_epu16_mask - // CHECK: icmp ule <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmple_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ule <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmple_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ule <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmple_epu16_mask(__a, __b); } __mmask8 test_mm_mask_cmple_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmple_epu16_mask - // CHECK: icmp ule <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmple_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ule <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmple_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ule <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmple_epu16_mask(__u, __a, __b); } __mmask32 test_mm256_cmple_epi8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmple_epi8_mask - // CHECK: icmp sle <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmple_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sle <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmple_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sle <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmple_epi8_mask(__a, __b); } __mmask32 test_mm256_mask_cmple_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmple_epi8_mask - // CHECK: icmp sle <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmple_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sle <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmple_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sle <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmple_epi8_mask(__u, __a, __b); } __mmask32 test_mm256_cmple_epu8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmple_epu8_mask - // CHECK: icmp ule <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmple_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ule <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmple_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ule <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmple_epu8_mask(__a, __b); } __mmask32 test_mm256_mask_cmple_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmple_epu8_mask - // CHECK: icmp ule <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmple_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ule <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmple_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ule <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmple_epu8_mask(__u, __a, __b); } __mmask16 test_mm256_cmple_epi16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmple_epi16_mask - // CHECK: icmp sle <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmple_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sle <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmple_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sle <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmple_epi16_mask(__a, __b); } __mmask16 test_mm256_mask_cmple_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmple_epi16_mask - // CHECK: icmp sle <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmple_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sle <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmple_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sle <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmple_epi16_mask(__u, __a, __b); } __mmask16 test_mm256_cmple_epu16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmple_epu16_mask - // CHECK: icmp ule <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmple_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ule <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmple_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ule <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmple_epu16_mask(__a, __b); } __mmask16 test_mm256_mask_cmple_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmple_epu16_mask - // CHECK: icmp ule <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmple_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ule <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmple_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ule <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmple_epu16_mask(__u, __a, __b); } __mmask16 test_mm_cmplt_epi8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmplt_epi8_mask - // CHECK: icmp slt <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmplt_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmplt_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmplt_epi8_mask(__a, __b); } __mmask16 test_mm_mask_cmplt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmplt_epi8_mask - // CHECK: icmp slt <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmplt_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmplt_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmplt_epi8_mask(__u, __a, __b); } __mmask16 test_mm_cmplt_epu8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmplt_epu8_mask - // CHECK: icmp ult <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmplt_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmplt_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmplt_epu8_mask(__a, __b); } __mmask16 test_mm_mask_cmplt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmplt_epu8_mask - // CHECK: icmp ult <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmplt_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmplt_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmplt_epu8_mask(__u, __a, __b); } __mmask8 test_mm_cmplt_epi16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmplt_epi16_mask - // CHECK: icmp slt <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmplt_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp slt <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmplt_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmplt_epi16_mask(__a, __b); } __mmask8 test_mm_mask_cmplt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmplt_epi16_mask - // CHECK: icmp slt <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmplt_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp slt <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmplt_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmplt_epi16_mask(__u, __a, __b); } __mmask8 test_mm_cmplt_epu16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmplt_epu16_mask - // CHECK: icmp ult <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmplt_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ult <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmplt_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmplt_epu16_mask(__a, __b); } __mmask8 test_mm_mask_cmplt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmplt_epu16_mask - // CHECK: icmp ult <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmplt_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ult <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmplt_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmplt_epu16_mask(__u, __a, __b); } __mmask32 test_mm256_cmplt_epi8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmplt_epi8_mask - // CHECK: icmp slt <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmplt_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp slt <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmplt_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmplt_epi8_mask(__a, __b); } __mmask32 test_mm256_mask_cmplt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmplt_epi8_mask - // CHECK: icmp slt <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmplt_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp slt <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmplt_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmplt_epi8_mask(__u, __a, __b); } __mmask32 test_mm256_cmplt_epu8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmplt_epu8_mask - // CHECK: icmp ult <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmplt_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ult <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmplt_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmplt_epu8_mask(__a, __b); } __mmask32 test_mm256_mask_cmplt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmplt_epu8_mask - // CHECK: icmp ult <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmplt_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ult <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmplt_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmplt_epu8_mask(__u, __a, __b); } __mmask16 test_mm256_cmplt_epi16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmplt_epi16_mask - // CHECK: icmp slt <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmplt_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmplt_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmplt_epi16_mask(__a, __b); } __mmask16 test_mm256_mask_cmplt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmplt_epi16_mask - // CHECK: icmp slt <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmplt_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmplt_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmplt_epi16_mask(__u, __a, __b); } __mmask16 test_mm256_cmplt_epu16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmplt_epu16_mask - // CHECK: icmp ult <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmplt_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmplt_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmplt_epu16_mask(__a, __b); } __mmask16 test_mm256_mask_cmplt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmplt_epu16_mask - // CHECK: icmp ult <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmplt_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmplt_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmplt_epu16_mask(__u, __a, __b); } __mmask16 test_mm_cmpneq_epi8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpneq_epi8_mask - // CHECK: icmp ne <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpneq_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpneq_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmpneq_epi8_mask(__a, __b); } __mmask16 test_mm_mask_cmpneq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpneq_epi8_mask - // CHECK: icmp ne <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpneq_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpneq_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmpneq_epi8_mask(__u, __a, __b); } __mmask16 test_mm_cmpneq_epu8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpneq_epu8_mask - // CHECK: icmp ne <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpneq_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpneq_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmpneq_epu8_mask(__a, __b); } __mmask16 test_mm_mask_cmpneq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpneq_epu8_mask - // CHECK: icmp ne <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpneq_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpneq_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmpneq_epu8_mask(__u, __a, __b); } __mmask8 test_mm_cmpneq_epi16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpneq_epi16_mask - // CHECK: icmp ne <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpneq_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ne <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpneq_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmpneq_epi16_mask(__a, __b); } __mmask8 test_mm_mask_cmpneq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpneq_epi16_mask - // CHECK: icmp ne <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpneq_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ne <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpneq_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmpneq_epi16_mask(__u, __a, __b); } __mmask8 test_mm_cmpneq_epu16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmpneq_epu16_mask - // CHECK: icmp ne <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmpneq_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ne <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmpneq_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmpneq_epu16_mask(__a, __b); } __mmask8 test_mm_mask_cmpneq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmpneq_epu16_mask - // CHECK: icmp ne <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmpneq_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ne <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmpneq_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmpneq_epu16_mask(__u, __a, __b); } __mmask32 test_mm256_cmpneq_epi8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpneq_epi8_mask - // CHECK: icmp ne <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpneq_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ne <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpneq_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmpneq_epi8_mask(__a, __b); } __mmask32 test_mm256_mask_cmpneq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpneq_epi8_mask - // CHECK: icmp ne <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpneq_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ne <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpneq_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmpneq_epi8_mask(__u, __a, __b); } __mmask32 test_mm256_cmpneq_epu8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpneq_epu8_mask - // CHECK: icmp ne <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpneq_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ne <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpneq_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmpneq_epu8_mask(__a, __b); } __mmask32 test_mm256_mask_cmpneq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpneq_epu8_mask - // CHECK: icmp ne <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpneq_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ne <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpneq_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmpneq_epu8_mask(__u, __a, __b); } __mmask16 test_mm256_cmpneq_epi16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpneq_epi16_mask - // CHECK: icmp ne <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpneq_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpneq_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmpneq_epi16_mask(__a, __b); } __mmask16 test_mm256_mask_cmpneq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpneq_epi16_mask - // CHECK: icmp ne <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpneq_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpneq_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmpneq_epi16_mask(__u, __a, __b); } __mmask16 test_mm256_cmpneq_epu16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmpneq_epu16_mask - // CHECK: icmp ne <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmpneq_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmpneq_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmpneq_epu16_mask(__a, __b); } __mmask16 test_mm256_mask_cmpneq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmpneq_epu16_mask - // CHECK: icmp ne <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmpneq_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmpneq_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ne <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmpneq_epu16_mask(__u, __a, __b); } __mmask16 test_mm_cmp_epi8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmp_epi8_mask - // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmp_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmp_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmp_epi8_mask(__a, __b, 0); } __mmask16 test_mm_mask_cmp_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmp_epi8_mask - // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmp_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmp_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmp_epi8_mask(__u, __a, __b, 0); } __mmask16 test_mm_cmp_epu8_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmp_epu8_mask - // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmp_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmp_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm_cmp_epu8_mask(__a, __b, 0); } __mmask16 test_mm_mask_cmp_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmp_epu8_mask - // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmp_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmp_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm_mask_cmp_epu8_mask(__u, __a, __b, 0); } __mmask8 test_mm_cmp_epi16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmp_epi16_mask - // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmp_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmp_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmp_epi16_mask(__a, __b, 0); } __mmask8 test_mm_mask_cmp_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmp_epi16_mask - // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmp_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmp_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmp_epi16_mask(__u, __a, __b, 0); } __mmask8 test_mm_cmp_epu16_mask(__m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_cmp_epu16_mask - // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_cmp_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // SIGNED-CHAR-NEXT: ret i8 %3 + // NO-SIGNED-CHAR-LABEL: test_mm_cmp_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i1> %2 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %3 return (__mmask8)_mm_cmp_epu16_mask(__a, __b, 0); } __mmask8 test_mm_mask_cmp_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { - // CHECK-LABEL: @test_mm_mask_cmp_epu16_mask - // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cmp_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // SIGNED-CHAR-NEXT: ret i8 %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cmp_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__a to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__b to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__u to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <8 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i1> %4 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %5 return (__mmask8)_mm_mask_cmp_epu16_mask(__u, __a, __b, 0); } __mmask32 test_mm256_cmp_epi8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmp_epi8_mask - // CHECK: icmp eq <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmp_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmp_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmp_epi8_mask(__a, __b, 0); } __mmask32 test_mm256_mask_cmp_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmp_epi8_mask - // CHECK: icmp eq <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmp_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmp_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmp_epi8_mask(__u, __a, __b, 0); } __mmask32 test_mm256_cmp_epu8_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmp_epu8_mask - // CHECK: icmp eq <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmp_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // SIGNED-CHAR-NEXT: ret i32 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmp_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i1> %2 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %3 return (__mmask32)_mm256_cmp_epu8_mask(__a, __b, 0); } __mmask32 test_mm256_mask_cmp_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmp_epu8_mask - // CHECK: icmp eq <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmp_epu8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // SIGNED-CHAR-NEXT: ret i32 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmp_epu8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__u to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <32 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i1> %4 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %5 return (__mmask32)_mm256_mask_cmp_epu8_mask(__u, __a, __b, 0); } __mmask16 test_mm256_cmp_epi16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmp_epi16_mask - // CHECK: icmp eq <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmp_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmp_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmp_epi16_mask(__a, __b, 0); } __mmask16 test_mm256_mask_cmp_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmp_epi16_mask - // CHECK: icmp eq <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmp_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmp_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmp_epi16_mask(__u, __a, __b, 0); } __mmask16 test_mm256_cmp_epu16_mask(__m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_cmp_epu16_mask - // CHECK: icmp eq <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_cmp_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // SIGNED-CHAR-NEXT: ret i16 %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_cmp_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i1> %2 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %3 return (__mmask16)_mm256_cmp_epu16_mask(__a, __b, 0); } __mmask16 test_mm256_mask_cmp_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { - // CHECK-LABEL: @test_mm256_mask_cmp_epu16_mask - // CHECK: icmp eq <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cmp_epu16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // SIGNED-CHAR-NEXT: ret i16 %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cmp_epu16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__a to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__b to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp eq <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__u to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = and <16 x i1> %2, %3 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i1> %4 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %5 return (__mmask16)_mm256_mask_cmp_epu16_mask(__u, __a, __b, 0); } __m256i test_mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){ - //CHECK-LABEL: @test_mm256_mask_add_epi8 - //CHECK: add <32 x i8> %{{.*}}, %{{.*}} - //CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_add_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %add.i.i = add <32 x i8> %1, %0 + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %add.i.i, <32 x i8> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_add_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %add.i.i = add <32 x i8> %1, %0 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %add.i.i, <32 x i8> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_add_epi8(__W, __U , __A, __B); } __m256i test_mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_add_epi8 - //CHECK: add <32 x i8> %{{.*}}, %{{.*}} - //CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_add_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %add.i.i = add <32 x i8> %1, %0 + // SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %add.i.i, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_add_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %add.i.i = add <32 x i8> %1, %0 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %add.i.i, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_add_epi8(__U , __A, __B); } __m256i test_mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_add_epi16 - //CHECK: add <16 x i16> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_add_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %add.i.i = add <16 x i16> %1, %0 + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %add.i.i, <16 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_add_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %add.i.i = add <16 x i16> %1, %0 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %add.i.i, <16 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_add_epi16(__W, __U , __A, __B); } __m256i test_mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_add_epi16 - //CHECK: add <16 x i16> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_add_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %add.i.i = add <16 x i16> %1, %0 + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %add.i.i, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_add_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %add.i.i = add <16 x i16> %1, %0 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %add.i.i, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_add_epi16(__U , __A, __B); } __m256i test_mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_sub_epi8 - //CHECK: sub <32 x i8> %{{.*}}, %{{.*}} - //CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_sub_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %sub.i.i = sub <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %sub.i.i, <32 x i8> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_sub_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %sub.i.i = sub <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %sub.i.i, <32 x i8> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_sub_epi8(__W, __U , __A, __B); } __m256i test_mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_sub_epi8 - //CHECK: sub <32 x i8> %{{.*}}, %{{.*}} - //CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_sub_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %sub.i.i = sub <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %sub.i.i, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_sub_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %sub.i.i = sub <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %sub.i.i, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_sub_epi8(__U , __A, __B); } __m256i test_mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_sub_epi16 - //CHECK: sub <16 x i16> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_sub_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %sub.i.i = sub <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %sub.i.i, <16 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_sub_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %sub.i.i = sub <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %sub.i.i, <16 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_sub_epi16(__W, __U , __A, __B); } __m256i test_mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_sub_epi16 - //CHECK: sub <16 x i16> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_sub_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %sub.i.i = sub <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %sub.i.i, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_sub_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %sub.i.i = sub <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %sub.i.i, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_sub_epi16(__U , __A, __B); } __m128i test_mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_add_epi8 - //CHECK: add <16 x i8> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_add_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %add.i.i = add <16 x i8> %1, %0 + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %add.i.i, <16 x i8> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_add_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %add.i.i = add <16 x i8> %1, %0 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %add.i.i, <16 x i8> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_add_epi8(__W, __U , __A, __B); } __m128i test_mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_add_epi8 - //CHECK: add <16 x i8> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_add_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %add.i.i = add <16 x i8> %1, %0 + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %add.i.i, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_add_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %add.i.i = add <16 x i8> %1, %0 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %add.i.i, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_add_epi8(__U , __A, __B); } __m128i test_mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_add_epi16 - //CHECK: add <8 x i16> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_add_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %add.i.i = add <8 x i16> %1, %0 + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %add.i.i, <8 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_add_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %add.i.i = add <8 x i16> %1, %0 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %add.i.i, <8 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_add_epi16(__W, __U , __A, __B); } __m128i test_mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_add_epi16 - //CHECK: add <8 x i16> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_add_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %add.i.i = add <8 x i16> %1, %0 + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %add.i.i, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_add_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %add.i.i = add <8 x i16> %1, %0 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %add.i.i, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_add_epi16(__U , __A, __B); } __m128i test_mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_sub_epi8 - //CHECK: sub <16 x i8> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_sub_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %sub.i.i = sub <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %sub.i.i, <16 x i8> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_sub_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %sub.i.i = sub <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %sub.i.i, <16 x i8> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_sub_epi8(__W, __U , __A, __B); } __m128i test_mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_sub_epi8 - //CHECK: sub <16 x i8> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_sub_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %sub.i.i = sub <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %sub.i.i, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_sub_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %sub.i.i = sub <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %sub.i.i, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_sub_epi8(__U , __A, __B); } __m128i test_mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_sub_epi16 - //CHECK: sub <8 x i16> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_sub_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %sub.i.i = sub <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %sub.i.i, <8 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_sub_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %sub.i.i = sub <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %sub.i.i, <8 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_sub_epi16(__W, __U , __A, __B); } __m128i test_mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_sub_epi16 - //CHECK: sub <8 x i16> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_sub_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %sub.i.i = sub <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %sub.i.i, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_sub_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %sub.i.i = sub <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %sub.i.i, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_sub_epi16(__U , __A, __B); } __m256i test_mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_mask_mullo_epi16 - //CHECK: mul <16 x i16> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_mullo_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %mul.i.i = mul <16 x i16> %1, %0 + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %mul.i.i, <16 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_mullo_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %mul.i.i = mul <16 x i16> %1, %0 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %mul.i.i, <16 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_mullo_epi16(__W, __U , __A, __B); } __m256i test_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { - //CHECK-LABEL: @test_mm256_maskz_mullo_epi16 - //CHECK: mul <16 x i16> %{{.*}}, %{{.*}} - //CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_mullo_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %mul.i.i = mul <16 x i16> %1, %0 + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %mul.i.i, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_mullo_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %mul.i.i = mul <16 x i16> %1, %0 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %mul.i.i, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_mullo_epi16(__U , __A, __B); } __m128i test_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_mask_mullo_epi16 - //CHECK: mul <8 x i16> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_mullo_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %mul.i.i = mul <8 x i16> %1, %0 + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %mul.i.i, <8 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_mullo_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %mul.i.i = mul <8 x i16> %1, %0 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %mul.i.i, <8 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_mullo_epi16(__W, __U , __A, __B); } __m128i test_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { - //CHECK-LABEL: @test_mm_maskz_mullo_epi16 - //CHECK: mul <8 x i16> %{{.*}}, %{{.*}} - //CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_mullo_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %mul.i.i = mul <8 x i16> %1, %0 + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %mul.i.i, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_mullo_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %mul.i.i = mul <8 x i16> %1, %0 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %mul.i.i, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_mullo_epi16(__U , __A, __B); } __m128i test_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) { - // CHECK-LABEL: @test_mm_mask_blend_epi8 - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_blend_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_blend_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_mask_blend_epi8(__U,__A,__W); } __m256i test_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) { - // CHECK-LABEL: @test_mm256_mask_blend_epi8 - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_blend_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_blend_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_mask_blend_epi8(__U,__A,__W); } __m128i test_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) { - // CHECK-LABEL: @test_mm_mask_blend_epi16 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_blend_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_blend_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_mask_blend_epi16(__U,__A,__W); } __m256i test_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) { - // CHECK-LABEL: @test_mm256_mask_blend_epi16 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_blend_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_blend_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_mask_blend_epi16(__U,__A,__W); } __m128i test_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_abs_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = sub <16 x i8> zeroinitializer, %0 + // SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %0 + // SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %5 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <16 x i8> %6 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_abs_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = sub <16 x i8> zeroinitializer, %0 + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %0 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <16 x i8> %6 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %7 return _mm_mask_abs_epi8(__W,__U,__A); } __m128i test_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_abs_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = sub <16 x i8> zeroinitializer, %0 + // SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %0 + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_abs_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = sub <16 x i8> zeroinitializer, %0 + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %0 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_maskz_abs_epi8(__U,__A); } __m256i test_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_abs_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = sub <32 x i8> zeroinitializer, %0 + // SIGNED-CHAR-NEXT: %2 = icmp slt <32 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %0 + // SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %5 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <32 x i8> %6 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_abs_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = sub <32 x i8> zeroinitializer, %0 + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <32 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %0 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <32 x i8> %6 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %7 return _mm256_mask_abs_epi8(__W,__U,__A); } __m256i test_mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_abs_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = sub <32 x i8> zeroinitializer, %0 + // SIGNED-CHAR-NEXT: %2 = icmp slt <32 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %0 + // SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_abs_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = sub <32 x i8> zeroinitializer, %0 + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <32 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %0 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_maskz_abs_epi8(__U,__A); } __m128i test_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_abs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = sub <8 x i16> zeroinitializer, %0 + // SIGNED-CHAR-NEXT: %2 = icmp slt <8 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %0 + // SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <8 x i16> %6 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_abs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = sub <8 x i16> zeroinitializer, %0 + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <8 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %0 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <8 x i16> %6 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %7 return _mm_mask_abs_epi16(__W,__U,__A); } __m128i test_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_abs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = sub <8 x i16> zeroinitializer, %0 + // SIGNED-CHAR-NEXT: %2 = icmp slt <8 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %0 + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_abs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = sub <8 x i16> zeroinitializer, %0 + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <8 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %0 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_maskz_abs_epi16(__U,__A); } __m256i test_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_abs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = sub <16 x i16> zeroinitializer, %0 + // SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %0 + // SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %5 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <16 x i16> %6 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_abs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = sub <16 x i16> zeroinitializer, %0 + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %0 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <16 x i16> %6 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %7 return _mm256_mask_abs_epi16(__W,__U,__A); } __m256i test_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_abs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = sub <16 x i16> zeroinitializer, %0 + // SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %0 + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_abs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = sub <16 x i16> zeroinitializer, %0 + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %0 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_maskz_abs_epi16(__U,__A); } __m128i test_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_packs_epi32 - // CHECK: @llvm.x86.sse2.packssdw - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_packs_epi32 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %0, <4 x i32> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_packs_epi32 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %0, <4 x i32> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_packs_epi32(__M,__A,__B); } __m128i test_mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_packs_epi32 - // CHECK: @llvm.x86.sse2.packssdw - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_packs_epi32 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %0, <4 x i32> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_packs_epi32 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %0, <4 x i32> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_packs_epi32(__W,__M,__A,__B); } __m256i test_mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_packs_epi32 - // CHECK: @llvm.x86.avx2.packssdw - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_packs_epi32 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %0, <8 x i32> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_packs_epi32 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %0, <8 x i32> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_packs_epi32(__M,__A,__B); } __m256i test_mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_packs_epi32 - // CHECK: @llvm.x86.avx2.packssdw - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_packs_epi32 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %0, <8 x i32> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_packs_epi32 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %0, <8 x i32> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_packs_epi32(__W,__M,__A,__B); } __m128i test_mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_packs_epi16 - // CHECK: @llvm.x86.sse2.packsswb - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_packs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_packs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_packs_epi16(__M,__A,__B); } __m128i test_mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_packs_epi16 - // CHECK: @llvm.x86.sse2.packsswb - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_packs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_packs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_packs_epi16(__W,__M,__A,__B); } __m256i test_mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_packs_epi16 - // CHECK: @llvm.x86.avx2.packsswb - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_packs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_packs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_packs_epi16(__M,__A,__B); } __m256i test_mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_packs_epi16 - // CHECK: @llvm.x86.avx2.packsswb - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_packs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_packs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_packs_epi16(__W,__M,__A,__B); } __m128i test_mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_packus_epi32 - // CHECK: @llvm.x86.sse41.packusdw - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_packus_epi32 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %0, <4 x i32> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_packus_epi32 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %0, <4 x i32> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_packus_epi32(__W,__M,__A,__B); } __m128i test_mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_packus_epi32 - // CHECK: @llvm.x86.sse41.packusdw - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_packus_epi32 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %0, <4 x i32> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_packus_epi32 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <4 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <4 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %0, <4 x i32> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_packus_epi32(__M,__A,__B); } __m256i test_mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_packus_epi32 - // CHECK: @llvm.x86.avx2.packusdw - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_packus_epi32 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %0, <8 x i32> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_packus_epi32 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %0, <8 x i32> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_packus_epi32(__M,__A,__B); } __m256i test_mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_packus_epi32 - // CHECK: @llvm.x86.avx2.packusdw - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_packus_epi32 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %0, <8 x i32> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_packus_epi32 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <8 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <8 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %0, <8 x i32> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_packus_epi32(__W,__M,__A,__B); } __m128i test_mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_packus_epi16 - // CHECK: @llvm.x86.sse2.packuswb - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_packus_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_packus_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_packus_epi16(__M,__A,__B); } __m128i test_mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_packus_epi16 - // CHECK: @llvm.x86.sse2.packuswb - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_packus_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_packus_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_packus_epi16(__W,__M,__A,__B); } __m256i test_mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_packus_epi16 - // CHECK: @llvm.x86.avx2.packuswb - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_packus_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_packus_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_packus_epi16(__M,__A,__B); } __m256i test_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_packus_epi16 - // CHECK: @llvm.x86.avx2.packuswb - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_packus_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_packus_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_packus_epi16(__W,__M,__A,__B); } __m128i test_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_adds_epi8 - // CHECK: @llvm.sadd.sat.v16i8 - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_adds_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_adds_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_adds_epi8(__W,__U,__A,__B); } __m128i test_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_adds_epi8 - // CHECK: @llvm.sadd.sat.v16i8 - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_adds_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_adds_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_adds_epi8(__U,__A,__B); } __m256i test_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_adds_epi8 - // CHECK: @llvm.sadd.sat.v32i8 - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_adds_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_adds_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_adds_epi8(__W,__U,__A,__B); } __m256i test_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_adds_epi8 - // CHECK: @llvm.sadd.sat.v32i8 - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_adds_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_adds_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_adds_epi8(__U,__A,__B); } __m128i test_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_adds_epi16 - // CHECK: @llvm.sadd.sat.v8i16 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_adds_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_adds_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_adds_epi16(__W,__U,__A,__B); } __m128i test_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_adds_epi16 - // CHECK: @llvm.sadd.sat.v8i16 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_adds_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_adds_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_adds_epi16(__U,__A,__B); } __m256i test_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_adds_epi16 - // CHECK: @llvm.sadd.sat.v16i16 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_adds_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_adds_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_adds_epi16(__W,__U,__A,__B); } __m256i test_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_adds_epi16 - // CHECK: @llvm.sadd.sat.v16i16 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_adds_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_adds_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_adds_epi16(__U,__A,__B); } __m128i test_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_adds_epu8 - // CHECK-NOT: @llvm.x86.sse2.paddus.b - // CHECK: call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_adds_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_adds_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_adds_epu8(__W,__U,__A,__B); } __m128i test_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_adds_epu8 - // CHECK-NOT: @llvm.x86.sse2.paddus.b - // CHECK: call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_adds_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_adds_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_adds_epu8(__U,__A,__B); } __m256i test_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_adds_epu8 - // CHECK-NOT: @llvm.x86.avx2.paddus.b - // CHECK: call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_adds_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_adds_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_adds_epu8(__W,__U,__A,__B); } __m256i test_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_adds_epu8 - // CHECK-NOT: @llvm.x86.avx2.paddus.b - // CHECK: call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_adds_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_adds_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_adds_epu8(__U,__A,__B); } __m128i test_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_adds_epu16 - // CHECK-NOT: @llvm.x86.sse2.paddus.w - // CHECK: call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_adds_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_adds_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_adds_epu16(__W,__U,__A,__B); } __m128i test_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_adds_epu16 - // CHECK-NOT: @llvm.x86.sse2.paddus.w - // CHECK: call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_adds_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_adds_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_adds_epu16(__U,__A,__B); } __m256i test_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_adds_epu16 - // CHECK-NOT: @llvm.x86.avx2.paddus.w - // CHECK: call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_adds_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_adds_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_adds_epu16(__W,__U,__A,__B); } __m256i test_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_adds_epu16 - // CHECK-NOT: @llvm.x86.avx2.paddus.w - // CHECK: call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_adds_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_adds_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_adds_epu16(__U,__A,__B); } __m128i test_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_avg_epu8 - // CHECK: @llvm.x86.sse2.pavg.b - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_avg_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_avg_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_avg_epu8(__W,__U,__A,__B); } __m128i test_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_avg_epu8 - // CHECK: @llvm.x86.sse2.pavg.b - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_avg_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_avg_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_avg_epu8(__U,__A,__B); } __m256i test_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_avg_epu8 - // CHECK: @llvm.x86.avx2.pavg.b - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_avg_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_avg_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_avg_epu8(__W,__U,__A,__B); } __m256i test_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_avg_epu8 - // CHECK: @llvm.x86.avx2.pavg.b - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_avg_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_avg_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_avg_epu8(__U,__A,__B); } __m128i test_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_avg_epu16 - // CHECK: @llvm.x86.sse2.pavg.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_avg_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_avg_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_avg_epu16(__W,__U,__A,__B); } __m128i test_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_avg_epu16 - // CHECK: @llvm.x86.sse2.pavg.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_avg_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_avg_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_avg_epu16(__U,__A,__B); } __m256i test_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_avg_epu16 - // CHECK: @llvm.x86.avx2.pavg.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_avg_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_avg_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_avg_epu16(__W,__U,__A,__B); } __m256i test_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_avg_epu16 - // CHECK: @llvm.x86.avx2.pavg.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_avg_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_avg_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_avg_epu16(__U,__A,__B); } __m128i test_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_max_epi8 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_max_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_max_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_maskz_max_epi8(__M,__A,__B); } __m128i test_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_max_epi8 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_max_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <16 x i8> %6 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_max_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <16 x i8> %6 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %7 return _mm_mask_max_epi8(__W,__M,__A,__B); } __m256i test_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_max_epi8 - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] - // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_max_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_max_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_maskz_max_epi8(__M,__A,__B); } __m256i test_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_max_epi8 - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] - // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_max_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %5 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <32 x i8> %6 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_max_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <32 x i8> %6 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %7 return _mm256_mask_max_epi8(__W,__M,__A,__B); } __m128i test_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_max_epi16 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_max_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_max_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_maskz_max_epi16(__M,__A,__B); } __m128i test_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_max_epi16 - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_max_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <8 x i16> %6 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_max_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <8 x i16> %6 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %7 return _mm_mask_max_epi16(__W,__M,__A,__B); } __m256i test_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_max_epi16 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_max_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_max_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_maskz_max_epi16(__M,__A,__B); } __m256i test_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_max_epi16 - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_max_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <16 x i16> %6 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_max_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp sgt <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <16 x i16> %6 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %7 return _mm256_mask_max_epi16(__W,__M,__A,__B); } __m128i test_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_max_epu8 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_max_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_max_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_maskz_max_epu8(__M,__A,__B); } __m128i test_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_max_epu8 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_max_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <16 x i8> %6 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_max_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <16 x i8> %6 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %7 return _mm_mask_max_epu8(__W,__M,__A,__B); } __m256i test_mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_max_epu8 - // CHECK: [[CMP:%.*]] = icmp ugt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] - // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_max_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_max_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_maskz_max_epu8(__M,__A,__B); } __m256i test_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_max_epu8 - // CHECK: [[CMP:%.*]] = icmp ugt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] - // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_max_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %5 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <32 x i8> %6 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_max_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <32 x i8> %6 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %7 return _mm256_mask_max_epu8(__W,__M,__A,__B); } __m128i test_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_max_epu16 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_max_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_max_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_maskz_max_epu16(__M,__A,__B); } __m128i test_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_max_epu16 - // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_max_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <8 x i16> %6 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_max_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <8 x i16> %6 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %7 return _mm_mask_max_epu16(__W,__M,__A,__B); } __m256i test_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_max_epu16 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_max_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_max_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_maskz_max_epu16(__M,__A,__B); } __m256i test_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_max_epu16 - // CHECK: [[CMP:%.*]] = icmp ugt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_max_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <16 x i16> %6 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_max_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ugt <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <16 x i16> %6 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %7 return _mm256_mask_max_epu16(__W,__M,__A,__B); } __m128i test_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_min_epi8 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_min_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_min_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_maskz_min_epi8(__M,__A,__B); } __m128i test_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_min_epi8 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_min_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <16 x i8> %6 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_min_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <16 x i8> %6 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %7 return _mm_mask_min_epi8(__W,__M,__A,__B); } __m256i test_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_min_epi8 - // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] - // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_min_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp slt <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_min_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_maskz_min_epi8(__M,__A,__B); } __m256i test_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_min_epi8 - // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] - // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_min_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp slt <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %5 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <32 x i8> %6 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_min_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <32 x i8> %6 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %7 return _mm256_mask_min_epi8(__W,__M,__A,__B); } __m128i test_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_min_epi16 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_min_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp slt <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_min_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_maskz_min_epi16(__M,__A,__B); } __m128i test_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_min_epi16 - // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_min_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp slt <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <8 x i16> %6 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_min_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <8 x i16> %6 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %7 return _mm_mask_min_epi16(__W,__M,__A,__B); } __m256i test_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_min_epi16 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_min_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_min_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_maskz_min_epi16(__M,__A,__B); } __m256i test_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_min_epi16 - // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_min_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <16 x i16> %6 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_min_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp slt <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <16 x i16> %6 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %7 return _mm256_mask_min_epi16(__W,__M,__A,__B); } __m128i test_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_min_epu8 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_min_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_min_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_maskz_min_epu8(__M,__A,__B); } __m128i test_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_min_epu8 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_min_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <16 x i8> %6 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_min_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <16 x i8> %6 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %7 return _mm_mask_min_epu8(__W,__M,__A,__B); } __m256i test_mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_min_epu8 - // CHECK: [[CMP:%.*]] = icmp ult <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] - // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_min_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ult <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_min_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_maskz_min_epu8(__M,__A,__B); } __m256i test_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_min_epu8 - // CHECK: [[CMP:%.*]] = icmp ult <32 x i8> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] - // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_min_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = icmp ult <32 x i8> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %5 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <32 x i8> %6 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_min_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <32 x i8> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <32 x i8> %6 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %7 return _mm256_mask_min_epu8(__W,__M,__A,__B); } __m128i test_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_min_epu16 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_min_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ult <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_min_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_maskz_min_epu16(__M,__A,__B); } __m128i test_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_min_epu16 - // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] - // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_min_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ult <8 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <8 x i16> %6 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_min_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <8 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <8 x i16> %6 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %7 return _mm_mask_min_epu16(__W,__M,__A,__B); } __m256i test_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_min_epu16 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_min_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_min_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_maskz_min_epu16(__M,__A,__B); } __m256i test_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_min_epu16 - // CHECK: [[CMP:%.*]] = icmp ult <16 x i16> [[X:%.*]], [[Y:%.*]] - // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] - // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_min_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i16> %0, %1 + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + // SIGNED-CHAR-NEXT: %7 = bitcast <16 x i16> %6 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %7 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_min_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = icmp ult <16 x i16> %0, %1 + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %5 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + // NO-SIGNED-CHAR-NEXT: %7 = bitcast <16 x i16> %6 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %7 return _mm256_mask_min_epu16(__W,__M,__A,__B); } __m128i test_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_shuffle_epi8 - // CHECK: @llvm.x86.ssse3.pshuf.b - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_shuffle_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_shuffle_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_shuffle_epi8(__W,__U,__A,__B); } __m128i test_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_shuffle_epi8 - // CHECK: @llvm.x86.ssse3.pshuf.b - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_shuffle_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_shuffle_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_shuffle_epi8(__U,__A,__B); } __m256i test_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_shuffle_epi8 - // CHECK: @llvm.x86.avx2.pshuf.b - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_shuffle_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_shuffle_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_shuffle_epi8(__W,__U,__A,__B); } __m256i test_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_shuffle_epi8 - // CHECK: @llvm.x86.avx2.pshuf.b - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_shuffle_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_shuffle_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_shuffle_epi8(__U,__A,__B); } __m128i test_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_subs_epi8 - // CHECK: @llvm.ssub.sat.v16i8 - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_subs_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_subs_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_subs_epi8(__W,__U,__A,__B); } __m128i test_mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_subs_epi8 - // CHECK: @llvm.ssub.sat.v16i8 - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_subs_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_subs_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_subs_epi8(__U,__A,__B); } __m256i test_mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_subs_epi8 - // CHECK: @llvm.ssub.sat.v32i8 - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_subs_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_subs_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_subs_epi8(__W,__U,__A,__B); } __m256i test_mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_subs_epi8 - // CHECK: @llvm.ssub.sat.v32i8 - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_subs_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_subs_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_subs_epi8(__U,__A,__B); } __m128i test_mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_subs_epi16 - // CHECK: @llvm.ssub.sat.v8i16 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_subs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_subs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_subs_epi16(__W,__U,__A,__B); } __m128i test_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_subs_epi16 - // CHECK: @llvm.ssub.sat.v8i16 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_subs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_subs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_subs_epi16(__U,__A,__B); } __m256i test_mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_subs_epi16 - // CHECK: @llvm.ssub.sat.v16i16 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_subs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_subs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_subs_epi16(__W,__U,__A,__B); } __m256i test_mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_subs_epi16 - // CHECK: @llvm.ssub.sat.v16i16 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_subs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_subs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_subs_epi16(__U,__A,__B); } __m128i test_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_subs_epu8 - // CHECK-NOT: @llvm.x86.sse2.psubus.b - // CHECK: call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_subs_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_subs_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i8> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_subs_epu8(__W,__U,__A,__B); } __m128i test_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_subs_epu8 - // CHECK-NOT: @llvm.x86.sse2.psubus.b - // CHECK: call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_subs_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_subs_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_subs_epu8(__U,__A,__B); } __m256i test_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_subs_epu8 - // CHECK-NOT: @llvm.x86.avx2.psubus.b - // CHECK: call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_subs_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_subs_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <32 x i8> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_subs_epu8(__W,__U,__A,__B); } __m256i test_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_subs_epu8 - // CHECK-NOT: @llvm.x86.avx2.psubus.b - // CHECK: call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_subs_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_subs_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_subs_epu8(__U,__A,__B); } __m128i test_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_subs_epu16 - // CHECK-NOT: @llvm.x86.sse2.psubus.w - // CHECK: call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_subs_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_subs_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_subs_epu16(__W,__U,__A,__B); } __m128i test_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_subs_epu16 - // CHECK-NOT: @llvm.x86.sse2.psubus.w - // CHECK: call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_subs_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_subs_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_subs_epu16(__U,__A,__B); } __m256i test_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_subs_epu16 - // CHECK-NOT: @llvm.x86.avx2.psubus.w - // CHECK: call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_subs_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_subs_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_subs_epu16(__W,__U,__A,__B); } __m256i test_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_subs_epu16 - // CHECK-NOT: @llvm.x86.avx2.psubus.w - // CHECK: call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_subs_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_subs_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_subs_epu16(__U,__A,__B); } __m128i test_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { - // CHECK-LABEL: @test_mm_mask2_permutex2var_epi16 - // CHECK: @llvm.x86.avx512.vpermi2var.hi.128 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask2_permutex2var_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__I to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) #16 + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask2_permutex2var_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__I to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) #16 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask2_permutex2var_epi16(__A,__I,__U,__B); } __m256i test_mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask2_permutex2var_epi16 - // CHECK: @llvm.x86.avx512.vpermi2var.hi.256 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask2_permutex2var_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__I to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) #16 + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask2_permutex2var_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__I to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) #16 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask2_permutex2var_epi16(__A,__I,__U,__B); } __m128i test_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B) { - // CHECK-LABEL: @test_mm_permutex2var_epi16 - // CHECK: @llvm.x86.avx512.vpermi2var.hi.128 + // SIGNED-CHAR-LABEL: test_mm_permutex2var_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__I to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) #16 + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_permutex2var_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__I to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) #16 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_permutex2var_epi16(__A,__I,__B); } __m128i test_mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_permutex2var_epi16 - // CHECK: @llvm.x86.avx512.vpermi2var.hi.128 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_permutex2var_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__I to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) #16 + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %0 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_permutex2var_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__I to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) #16 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %0 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_permutex2var_epi16(__A,__U,__I,__B); } __m128i test_mm_maskz_permutex2var_epi16(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_permutex2var_epi16 - // CHECK: @llvm.x86.avx512.vpermi2var.hi.128 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_permutex2var_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__I to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) #16 + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_permutex2var_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__I to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) #16 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_maskz_permutex2var_epi16(__U,__A,__I,__B); } __m256i test_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B) { - // CHECK-LABEL: @test_mm256_permutex2var_epi16 - // CHECK: @llvm.x86.avx512.vpermi2var.hi.256 + // SIGNED-CHAR-LABEL: test_mm256_permutex2var_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__I to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) #16 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_permutex2var_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__I to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) #16 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_permutex2var_epi16(__A,__I,__B); } __m256i test_mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_permutex2var_epi16 - // CHECK: @llvm.x86.avx512.vpermi2var.hi.256 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_permutex2var_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__I to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) #16 + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %0 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_permutex2var_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__I to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) #16 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %0 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_permutex2var_epi16(__A,__U,__I,__B); } __m256i test_mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A, __m256i __I, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_permutex2var_epi16 - // CHECK: @llvm.x86.avx512.vpermi2var.hi.256 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_permutex2var_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__I to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) #16 + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_permutex2var_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__I to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) #16 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_maskz_permutex2var_epi16(__U,__A,__I,__B); } __m128i test_mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_mask_maddubs_epi16 - // CHECK: @llvm.x86.ssse3.pmadd.ub.sw - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_maddubs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__X to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__Y to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_maddubs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__X to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__Y to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_maddubs_epi16(__W, __U, __X, __Y); } __m128i test_mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_maskz_maddubs_epi16 - // CHECK: @llvm.x86.ssse3.pmadd.ub.sw - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_maddubs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__X to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__Y to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %0, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_maddubs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__X to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__Y to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %0, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_maddubs_epi16(__U, __X, __Y); } __m256i test_mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_mask_maddubs_epi16 - // CHECK: @llvm.x86.avx2.pmadd.ub.sw - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_maddubs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__X to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__Y to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_maddubs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__X to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__Y to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_maddubs_epi16(__W, __U, __X, __Y); } __m256i test_mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_maskz_maddubs_epi16 - // CHECK: @llvm.x86.avx2.pmadd.ub.sw - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_maddubs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__X to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__Y to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %0, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_maddubs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__X to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__Y to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %0, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_maddubs_epi16(__U, __X, __Y); } __m128i test_mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_madd_epi16 - // CHECK: @llvm.x86.sse2.pmadd.wd - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_madd_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <4 x i32> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // SIGNED-CHAR-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_madd_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <4 x i32> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + // NO-SIGNED-CHAR-NEXT: %5 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <4 x i32> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_madd_epi16(__W, __U, __A, __B); } __m128i test_mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_madd_epi16 - // CHECK: @llvm.x86.sse2.pmadd.wd - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_madd_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // SIGNED-CHAR-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_madd_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + // NO-SIGNED-CHAR-NEXT: %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <4 x i32> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_madd_epi16(__U, __A, __B); } __m256i test_mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_madd_epi16 - // CHECK: @llvm.x86.avx2.pmadd.wd - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_madd_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <8 x i32> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_madd_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <8 x i32> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i32> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_madd_epi16(__W, __U, __A, __B); } __m256i test_mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_madd_epi16 - // CHECK: @llvm.x86.avx2.pmadd.wd - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_madd_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_madd_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i32> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_madd_epi16(__U, __A, __B); } __m128i test_mm_cvtsepi16_epi8(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtsepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.wb.128 + // SIGNED-CHAR-LABEL: test_mm_cvtsepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %0, <16 x i8> zeroinitializer, i8 -1) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %2 + // NO-SIGNED-CHAR-LABEL: test_mm_cvtsepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %0, <16 x i8> zeroinitializer, i8 -1) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %2 return _mm_cvtsepi16_epi8(__A); } __m128i test_mm_mask_cvtsepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtsepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.wb.128 + // SIGNED-CHAR-LABEL: test_mm_mask_cvtsepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %0, <16 x i8> %1, i8 %__M) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cvtsepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %0, <16 x i8> %1, i8 %__M) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_mask_cvtsepi16_epi8(__O, __M, __A); } __m128i test_mm_maskz_cvtsepi16_epi8(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtsepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.wb.128 + // SIGNED-CHAR-LABEL: test_mm_maskz_cvtsepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %0, <16 x i8> zeroinitializer, i8 %__M) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %2 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_cvtsepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %0, <16 x i8> zeroinitializer, i8 %__M) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtsepi16_epi8(__M, __A); } __m128i test_mm256_cvtsepi16_epi8(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtsepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.wb.256 + // SIGNED-CHAR-LABEL: test_mm256_cvtsepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %0, <16 x i8> zeroinitializer, i16 -1) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %2 + // NO-SIGNED-CHAR-LABEL: test_mm256_cvtsepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %0, <16 x i8> zeroinitializer, i16 -1) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %2 return _mm256_cvtsepi16_epi8(__A); } __m128i test_mm256_mask_cvtsepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtsepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.wb.256 + // SIGNED-CHAR-LABEL: test_mm256_mask_cvtsepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %0, <16 x i8> %1, i16 %__M) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cvtsepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %0, <16 x i8> %1, i16 %__M) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm256_mask_cvtsepi16_epi8(__O, __M, __A); } __m128i test_mm256_maskz_cvtsepi16_epi8(__mmask16 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtsepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.wb.256 + // SIGNED-CHAR-LABEL: test_mm256_maskz_cvtsepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %0, <16 x i8> zeroinitializer, i16 %__M) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %2 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_cvtsepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %0, <16 x i8> zeroinitializer, i16 %__M) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %2 return _mm256_maskz_cvtsepi16_epi8(__M, __A); } __m128i test_mm_cvtusepi16_epi8(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtusepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.wb.128 + // SIGNED-CHAR-LABEL: test_mm_cvtusepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %0, <16 x i8> zeroinitializer, i8 -1) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %2 + // NO-SIGNED-CHAR-LABEL: test_mm_cvtusepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %0, <16 x i8> zeroinitializer, i8 -1) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %2 return _mm_cvtusepi16_epi8(__A); } __m128i test_mm_mask_cvtusepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtusepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.wb.128 + // SIGNED-CHAR-LABEL: test_mm_mask_cvtusepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %0, <16 x i8> %1, i8 %__M) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cvtusepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %0, <16 x i8> %1, i8 %__M) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_mask_cvtusepi16_epi8(__O, __M, __A); } __m128i test_mm_maskz_cvtusepi16_epi8(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtusepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.wb.128 + // SIGNED-CHAR-LABEL: test_mm_maskz_cvtusepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %0, <16 x i8> zeroinitializer, i8 %__M) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %2 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_cvtusepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %0, <16 x i8> zeroinitializer, i8 %__M) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtusepi16_epi8(__M, __A); } __m128i test_mm256_cvtusepi16_epi8(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtusepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.wb.256 + // SIGNED-CHAR-LABEL: test_mm256_cvtusepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %0, <16 x i8> zeroinitializer, i16 -1) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %2 + // NO-SIGNED-CHAR-LABEL: test_mm256_cvtusepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %0, <16 x i8> zeroinitializer, i16 -1) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %2 return _mm256_cvtusepi16_epi8(__A); } __m128i test_mm256_mask_cvtusepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtusepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.wb.256 + // SIGNED-CHAR-LABEL: test_mm256_mask_cvtusepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %0, <16 x i8> %1, i16 %__M) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cvtusepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %0, <16 x i8> %1, i16 %__M) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm256_mask_cvtusepi16_epi8(__O, __M, __A); } __m128i test_mm256_maskz_cvtusepi16_epi8(__mmask16 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtusepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.wb.256 + // SIGNED-CHAR-LABEL: test_mm256_maskz_cvtusepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %0, <16 x i8> zeroinitializer, i16 %__M) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %2 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_cvtusepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %0, <16 x i8> zeroinitializer, i16 %__M) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %2 return _mm256_maskz_cvtusepi16_epi8(__M, __A); } __m128i test_mm_cvtepi16_epi8(__m128i __A) { - // CHECK-LABEL: @test_mm_cvtepi16_epi8 - // CHECK: trunc <8 x i16> %{{.*}} to <8 x i8> - // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> + // SIGNED-CHAR-LABEL: test_mm_cvtepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %conv.i = trunc <8 x i16> %0 to <8 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i = shufflevector <8 x i8> %conv.i, <8 x i8> zeroinitializer, <16 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast <16 x i8> %shuffle.i to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %1 + // NO-SIGNED-CHAR-LABEL: test_mm_cvtepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %conv.i = trunc <8 x i16> %0 to <8 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i = shufflevector <8 x i8> %conv.i, <8 x i8> zeroinitializer, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <16 x i8> %shuffle.i to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %1 return _mm_cvtepi16_epi8(__A); } __m128i test_mm_mask_cvtepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.wb.128 + // SIGNED-CHAR-LABEL: test_mm_mask_cvtepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %0, <16 x i8> %1, i8 %__M) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cvtepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %0, <16 x i8> %1, i8 %__M) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_mask_cvtepi16_epi8(__O, __M, __A); } __m128i test_mm_maskz_cvtepi16_epi8(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.wb.128 + // SIGNED-CHAR-LABEL: test_mm_maskz_cvtepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %0, <16 x i8> zeroinitializer, i8 %__M) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %2 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_cvtepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %0, <16 x i8> zeroinitializer, i8 %__M) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %2 return _mm_maskz_cvtepi16_epi8(__M, __A); } __m128i test_mm256_cvtepi16_epi8(__m256i __A) { - // CHECK-LABEL: @test_mm256_cvtepi16_epi8 - // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8> + // SIGNED-CHAR-LABEL: test_mm256_cvtepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %conv.i = trunc <16 x i16> %0 to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <16 x i8> %conv.i to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %1 + // NO-SIGNED-CHAR-LABEL: test_mm256_cvtepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %conv.i = trunc <16 x i16> %0 to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <16 x i8> %conv.i to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %1 return _mm256_cvtepi16_epi8(__A); } __m128i test_mm256_mask_cvtepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi16_epi8 - // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8> - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cvtepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %conv.i.i = trunc <16 x i16> %0 to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %conv.i.i, <16 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cvtepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %conv.i.i = trunc <16 x i16> %0 to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %conv.i.i, <16 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm256_mask_cvtepi16_epi8(__O, __M, __A); } __m128i test_mm256_maskz_cvtepi16_epi8(__mmask16 __M, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi16_epi8 - // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8> - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_cvtepi16_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %conv.i.i = trunc <16 x i16> %0 to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i8> %conv.i.i, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_cvtepi16_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %conv.i.i = trunc <16 x i16> %0 to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i8> %conv.i.i, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm256_maskz_cvtepi16_epi8(__M, __A); } __m128i test_mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_mask_mulhrs_epi16 - // CHECK: @llvm.x86.ssse3.pmul.hr.sw - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_mulhrs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__X to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__Y to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_mulhrs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__X to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__Y to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_mulhrs_epi16(__W, __U, __X, __Y); } __m128i test_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { - // CHECK-LABEL: @test_mm_maskz_mulhrs_epi16 - // CHECK: @llvm.x86.ssse3.pmul.hr.sw - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_mulhrs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__X to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__Y to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_mulhrs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__X to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__Y to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_mulhrs_epi16(__U, __X, __Y); } __m256i test_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_mask_mulhrs_epi16 - // CHECK: @llvm.x86.avx2.pmul.hr.sw - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_mulhrs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__X to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__Y to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_mulhrs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__X to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__Y to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_mulhrs_epi16(__W, __U, __X, __Y); } __m256i test_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { - // CHECK-LABEL: @test_mm256_maskz_mulhrs_epi16 - // CHECK: @llvm.x86.avx2.pmul.hr.sw - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_mulhrs_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__X to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__Y to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_mulhrs_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__X to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__Y to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_mulhrs_epi16(__U, __X, __Y); } __m128i test_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_mulhi_epu16 - // CHECK: @llvm.x86.sse2.pmulhu.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_mulhi_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_mulhi_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_mulhi_epu16(__W, __U, __A, __B); } __m128i test_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_mulhi_epu16 - // CHECK: @llvm.x86.sse2.pmulhu.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_mulhi_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_mulhi_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_mulhi_epu16(__U, __A, __B); } __m256i test_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_mulhi_epu16 - // CHECK: @llvm.x86.avx2.pmulhu.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_mulhi_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_mulhi_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_mulhi_epu16(__W, __U, __A, __B); } __m256i test_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_mulhi_epu16 - // CHECK: @llvm.x86.avx2.pmulhu.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_mulhi_epu16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_mulhi_epu16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_mulhi_epu16(__U, __A, __B); } __m128i test_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_mulhi_epi16 - // CHECK: @llvm.x86.sse2.pmulh.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_mulhi_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_mulhi_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_mulhi_epi16(__W, __U, __A, __B); } __m128i test_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_mulhi_epi16 - // CHECK: @llvm.x86.sse2.pmulh.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_mulhi_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_mulhi_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_mulhi_epi16(__U, __A, __B); } __m256i test_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_mulhi_epi16 - // CHECK: @llvm.x86.avx2.pmulh.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_mulhi_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_mulhi_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_mulhi_epi16(__W, __U, __A, __B); } __m256i test_mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_mulhi_epi16 - // CHECK: @llvm.x86.avx2.pmulh.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_mulhi_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_mulhi_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_mulhi_epi16(__U, __A, __B); } __m128i test_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_unpackhi_epi8 - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_unpackhi_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> %1, <16 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %shuffle.i.i, <16 x i8> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_unpackhi_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> %1, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %shuffle.i.i, <16 x i8> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_unpackhi_epi8(__W, __U, __A, __B); } __m128i test_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_unpackhi_epi8 - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_unpackhi_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> %1, <16 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %shuffle.i.i, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_unpackhi_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> %1, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %shuffle.i.i, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_unpackhi_epi8(__U, __A, __B); } __m256i test_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_unpackhi_epi8 - // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_unpackhi_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <32 x i8> %0, <32 x i8> %1, <32 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %shuffle.i.i, <32 x i8> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_unpackhi_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <32 x i8> %0, <32 x i8> %1, <32 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %shuffle.i.i, <32 x i8> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_unpackhi_epi8(__W, __U, __A, __B); } __m256i test_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_unpackhi_epi8 - // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_unpackhi_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <32 x i8> %0, <32 x i8> %1, <32 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %shuffle.i.i, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_unpackhi_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <32 x i8> %0, <32 x i8> %1, <32 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %shuffle.i.i, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_unpackhi_epi8(__U, __A, __B); } __m128i test_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_unpackhi_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_unpackhi_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> %1, <8 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %shuffle.i.i, <8 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_unpackhi_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> %1, <8 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %shuffle.i.i, <8 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_unpackhi_epi16(__W, __U, __A, __B); } __m128i test_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_unpackhi_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_unpackhi_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> %1, <8 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %shuffle.i.i, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_unpackhi_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> %1, <8 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %shuffle.i.i, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_unpackhi_epi16(__U, __A, __B); } __m256i test_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_unpackhi_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_unpackhi_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i16> %0, <16 x i16> %1, <16 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %shuffle.i.i, <16 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_unpackhi_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i16> %0, <16 x i16> %1, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %shuffle.i.i, <16 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_unpackhi_epi16(__W, __U, __A, __B); } __m256i test_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_unpackhi_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_unpackhi_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i16> %0, <16 x i16> %1, <16 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %shuffle.i.i, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_unpackhi_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i16> %0, <16 x i16> %1, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %shuffle.i.i, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_unpackhi_epi16(__U, __A, __B); } __m128i test_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_unpacklo_epi8 - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_unpacklo_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> %1, <16 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %shuffle.i.i, <16 x i8> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_unpacklo_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> %1, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %shuffle.i.i, <16 x i8> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_unpacklo_epi8(__W, __U, __A, __B); } __m128i test_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_unpacklo_epi8 - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_unpacklo_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> %1, <16 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %shuffle.i.i, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_unpacklo_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> %1, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %shuffle.i.i, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_unpacklo_epi8(__U, __A, __B); } __m256i test_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_unpacklo_epi8 - // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_unpacklo_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <32 x i8> %0, <32 x i8> %1, <32 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %shuffle.i.i, <32 x i8> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_unpacklo_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <32 x i8> %0, <32 x i8> %1, <32 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %shuffle.i.i, <32 x i8> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_unpacklo_epi8(__W, __U, __A, __B); } __m256i test_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_unpacklo_epi8 - // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_unpacklo_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <32 x i8> %0, <32 x i8> %1, <32 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %shuffle.i.i, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_unpacklo_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <32 x i8> %0, <32 x i8> %1, <32 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %shuffle.i.i, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_unpacklo_epi8(__U, __A, __B); } __m128i test_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_unpacklo_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_unpacklo_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> %1, <8 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %shuffle.i.i, <8 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_unpacklo_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> %1, <8 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %shuffle.i.i, <8 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_unpacklo_epi16(__W, __U, __A, __B); } __m128i test_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_unpacklo_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_unpacklo_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> %1, <8 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %shuffle.i.i, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_unpacklo_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> %1, <8 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %shuffle.i.i, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_unpacklo_epi16(__U, __A, __B); } __m256i test_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_unpacklo_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_unpacklo_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i16> %0, <16 x i16> %1, <16 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %shuffle.i.i, <16 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_unpacklo_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i16> %0, <16 x i16> %1, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %shuffle.i.i, <16 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_unpacklo_epi16(__W, __U, __A, __B); } __m256i test_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_unpacklo_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_unpacklo_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i16> %0, <16 x i16> %1, <16 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %shuffle.i.i, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_unpacklo_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i16> %0, <16 x i16> %1, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %shuffle.i.i, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_unpacklo_epi16(__U, __A, __B); } __m128i test_mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepi8_epi16 - // CHECK: sext <8 x i8> %{{.*}} to <8 x i16> - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cvtepi8_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // SIGNED-CHAR-NEXT: %conv.i.i = sext <8 x i8> %shuffle.i.i to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %conv.i.i, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cvtepi8_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // NO-SIGNED-CHAR-NEXT: %conv.i.i = sext <8 x i8> %shuffle.i.i to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %conv.i.i, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_mask_cvtepi8_epi16(__W, __U, __A); } __m128i test_mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepi8_epi16 - // CHECK: sext <8 x i8> %{{.*}} to <8 x i16> - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_cvtepi8_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // SIGNED-CHAR-NEXT: %conv.i.i = sext <8 x i8> %shuffle.i.i to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %conv.i.i, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_cvtepi8_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // NO-SIGNED-CHAR-NEXT: %conv.i.i = sext <8 x i8> %shuffle.i.i to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %conv.i.i, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_maskz_cvtepi8_epi16(__U, __A); } __m256i test_mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepi8_epi16 - // CHECK: sext <16 x i8> %{{.*}} to <16 x i16> - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cvtepi8_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %conv.i.i = sext <16 x i8> %0 to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %conv.i.i, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cvtepi8_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %conv.i.i = sext <16 x i8> %0 to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %conv.i.i, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_mask_cvtepi8_epi16(__W, __U, __A); } __m256i test_mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepi8_epi16 - // CHECK: sext <16 x i8> %{{.*}} to <16 x i16> - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_cvtepi8_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %conv.i.i = sext <16 x i8> %0 to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %conv.i.i, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_cvtepi8_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %conv.i.i = sext <16 x i8> %0 to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %conv.i.i, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_maskz_cvtepi8_epi16(__U, __A); } __m128i test_mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_cvtepu8_epi16 - // CHECK: zext <8 x i8> %{{.*}} to <8 x i16> - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_cvtepu8_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // SIGNED-CHAR-NEXT: %conv.i.i = zext <8 x i8> %shuffle.i.i to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %conv.i.i, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cvtepu8_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // NO-SIGNED-CHAR-NEXT: %conv.i.i = zext <8 x i8> %shuffle.i.i to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %conv.i.i, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_mask_cvtepu8_epi16(__W, __U, __A); } __m128i test_mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_cvtepu8_epi16 - // CHECK: zext <8 x i8> %{{.*}} to <8 x i16> - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_cvtepu8_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // SIGNED-CHAR-NEXT: %conv.i.i = zext <8 x i8> %shuffle.i.i to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %conv.i.i, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_cvtepu8_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> + // NO-SIGNED-CHAR-NEXT: %conv.i.i = zext <8 x i8> %shuffle.i.i to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %conv.i.i, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_maskz_cvtepu8_epi16(__U, __A); } __m256i test_mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_cvtepu8_epi16 - // CHECK: zext <16 x i8> %{{.*}} to <16 x i16> - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_cvtepu8_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %conv.i.i = zext <16 x i8> %0 to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %conv.i.i, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cvtepu8_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %conv.i.i = zext <16 x i8> %0 to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %conv.i.i, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_mask_cvtepu8_epi16(__W, __U, __A); } __m256i test_mm256_maskz_cvtepu8_epi16(__mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_cvtepu8_epi16 - // CHECK: zext <16 x i8> %{{.*}} to <16 x i16> - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_cvtepu8_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %conv.i.i = zext <16 x i8> %0 to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %conv.i.i, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_cvtepu8_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %conv.i.i = zext <16 x i8> %0 to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %conv.i.i, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_maskz_cvtepu8_epi16(__U, __A); } __m256i test_mm256_sllv_epi16(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_sllv_epi16 - // CHECK: @llvm.x86.avx512.psllv.w.256( + // SIGNED-CHAR-LABEL: test_mm256_sllv_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_sllv_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_sllv_epi16(__A, __B); } __m256i test_mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_sllv_epi16 - // CHECK: @llvm.x86.avx512.psllv.w.256( - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_sllv_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_sllv_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_sllv_epi16(__W, __U, __A, __B); } __m256i test_mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_sllv_epi16 - // CHECK: @llvm.x86.avx512.psllv.w.256( - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_sllv_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_sllv_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_sllv_epi16(__U, __A, __B); } __m128i test_mm_sllv_epi16(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_sllv_epi16 - // CHECK: @llvm.x86.avx512.psllv.w.128( + // SIGNED-CHAR-LABEL: test_mm_sllv_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_sllv_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_sllv_epi16(__A, __B); } __m128i test_mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_sllv_epi16 - // CHECK: @llvm.x86.avx512.psllv.w.128( - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_sllv_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_sllv_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_sllv_epi16(__W, __U, __A, __B); } __m128i test_mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_sllv_epi16 - // CHECK: @llvm.x86.avx512.psllv.w.128( - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_sllv_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_sllv_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_sllv_epi16(__U, __A, __B); } __m128i test_mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_sll_epi16 - // CHECK: @llvm.x86.sse2.psll.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_sll_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_sll_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_sll_epi16(__W, __U, __A, __B); } __m128i test_mm_maskz_sll_epi16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_sll_epi16 - // CHECK: @llvm.x86.sse2.psll.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_sll_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_sll_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_sll_epi16(__U, __A, __B); } __m256i test_mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_mask_sll_epi16 - // CHECK: @llvm.x86.avx2.psll.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_sll_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_sll_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_sll_epi16(__W, __U, __A, __B); } __m256i test_mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_maskz_sll_epi16 - // CHECK: @llvm.x86.avx2.psll.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_sll_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_sll_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_sll_epi16(__U, __A, __B); } __m128i test_mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_slli_epi16 - // CHECK: @llvm.x86.sse2.pslli.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_slli_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = shl <8 x i16> %0, + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_slli_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = shl <8 x i16> %0, + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_slli_epi16(__W, __U, __A, 5); } __m128i test_mm_mask_slli_epi16_2(__m128i __W, __mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_mask_slli_epi16_2 - // CHECK: @llvm.x86.sse2.pslli.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_slli_epi16_2 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %0, i32 %__B) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_slli_epi16_2 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %0, i32 %__B) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_slli_epi16(__W, __U, __A, __B); } __m128i test_mm_maskz_slli_epi16(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_slli_epi16 - // CHECK: @llvm.x86.sse2.pslli.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_slli_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = shl <8 x i16> %0, + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_slli_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = shl <8 x i16> %0, + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_slli_epi16(__U, __A, 5); } __m128i test_mm_maskz_slli_epi16_2(__mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_maskz_slli_epi16_2 - // CHECK: @llvm.x86.sse2.pslli.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_slli_epi16_2 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %0, i32 %__B) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_slli_epi16_2 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %0, i32 %__B) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_slli_epi16(__U, __A, __B); } __m256i test_mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_slli_epi16 - // CHECK: @llvm.x86.avx2.pslli.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_slli_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = shl <16 x i16> %0, + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_slli_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = shl <16 x i16> %0, + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_slli_epi16(__W, __U, __A, 5); } __m256i test_mm256_mask_slli_epi16_2(__m256i __W, __mmask16 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_mask_slli_epi16_2 - // CHECK: @llvm.x86.avx2.pslli.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_slli_epi16_2 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %0, i32 %__B) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_slli_epi16_2 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %0, i32 %__B) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_slli_epi16(__W, __U, __A, __B); } __m256i test_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_slli_epi16 - // CHECK: @llvm.x86.avx2.pslli.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_slli_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = shl <16 x i16> %0, + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_slli_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = shl <16 x i16> %0, + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_slli_epi16(__U, __A, 5); } __m256i test_mm256_maskz_slli_epi16_2(__mmask16 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_maskz_slli_epi16_2 - // CHECK: @llvm.x86.avx2.pslli.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_slli_epi16_2 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %0, i32 %__B) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_slli_epi16_2 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %0, i32 %__B) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_slli_epi16(__U, __A, __B); } __m256i test_mm256_srlv_epi16(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_srlv_epi16 - // CHECK: @llvm.x86.avx512.psrlv.w.256( + // SIGNED-CHAR-LABEL: test_mm256_srlv_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_srlv_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_srlv_epi16(__A, __B); } __m256i test_mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_srlv_epi16 - // CHECK: @llvm.x86.avx512.psrlv.w.256( - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_srlv_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_srlv_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_srlv_epi16(__W, __U, __A, __B); } __m256i test_mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_srlv_epi16 - // CHECK: @llvm.x86.avx512.psrlv.w.256( - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_srlv_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_srlv_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_srlv_epi16(__U, __A, __B); } __m128i test_mm_srlv_epi16(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_srlv_epi16 - // CHECK: @llvm.x86.avx512.psrlv.w.128( + // SIGNED-CHAR-LABEL: test_mm_srlv_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_srlv_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_srlv_epi16(__A, __B); } __m128i test_mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_srlv_epi16 - // CHECK: @llvm.x86.avx512.psrlv.w.128( - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_srlv_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_srlv_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_srlv_epi16(__W, __U, __A, __B); } __m128i test_mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_srlv_epi16 - // CHECK: @llvm.x86.avx512.psrlv.w.128( - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_srlv_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_srlv_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_srlv_epi16(__U, __A, __B); } __m128i test_mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_srl_epi16 - // CHECK: @llvm.x86.sse2.psrl.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_srl_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_srl_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_srl_epi16(__W, __U, __A, __B); } __m128i test_mm_maskz_srl_epi16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_srl_epi16 - // CHECK: @llvm.x86.sse2.psrl.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_srl_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_srl_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_srl_epi16(__U, __A, __B); } __m256i test_mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_mask_srl_epi16 - // CHECK: @llvm.x86.avx2.psrl.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_srl_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_srl_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_srl_epi16(__W, __U, __A, __B); } __m256i test_mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_maskz_srl_epi16 - // CHECK: @llvm.x86.avx2.psrl.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_srl_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_srl_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_srl_epi16(__U, __A, __B); } __m128i test_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_srli_epi16 - // CHECK: @llvm.x86.sse2.psrli.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_srli_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = lshr <8 x i16> %0, + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_srli_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = lshr <8 x i16> %0, + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_srli_epi16(__W, __U, __A, 5); } __m128i test_mm_mask_srli_epi16_2(__m128i __W, __mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_mask_srli_epi16_2 - // CHECK: @llvm.x86.sse2.psrli.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_srli_epi16_2 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %0, i32 %__B) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_srli_epi16_2 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %0, i32 %__B) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_srli_epi16(__W, __U, __A, __B); } __m128i test_mm_maskz_srli_epi16(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_srli_epi16 - // CHECK: @llvm.x86.sse2.psrli.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_srli_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = lshr <8 x i16> %0, + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_srli_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = lshr <8 x i16> %0, + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_srli_epi16(__U, __A, 5); } __m128i test_mm_maskz_srli_epi16_2(__mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_maskz_srli_epi16_2 - // CHECK: @llvm.x86.sse2.psrli.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_srli_epi16_2 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %0, i32 %__B) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_srli_epi16_2 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %0, i32 %__B) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_srli_epi16(__U, __A, __B); } __m256i test_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_srli_epi16 - // CHECK: @llvm.x86.avx2.psrli.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_srli_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = lshr <16 x i16> %0, + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_srli_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = lshr <16 x i16> %0, + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_srli_epi16(__W, __U, __A, 5); } __m256i test_mm256_mask_srli_epi16_2(__m256i __W, __mmask16 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_mask_srli_epi16_2 - // CHECK: @llvm.x86.avx2.psrli.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_srli_epi16_2 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %0, i32 %__B) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_srli_epi16_2 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %0, i32 %__B) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_srli_epi16(__W, __U, __A, __B); } __m256i test_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_srli_epi16 - // CHECK: @llvm.x86.avx2.psrli.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_srli_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = lshr <16 x i16> %0, + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_srli_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = lshr <16 x i16> %0, + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_srli_epi16(__U, __A, 5); } __m256i test_mm256_maskz_srli_epi16_2(__mmask16 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_maskz_srli_epi16_2 - // CHECK: @llvm.x86.avx2.psrli.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_srli_epi16_2 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %0, i32 %__B) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_srli_epi16_2 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %0, i32 %__B) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_srli_epi16(__U, __A, __B); } __m256i test_mm256_srav_epi16(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_srav_epi16 - // CHECK: @llvm.x86.avx512.psrav.w.256( + // SIGNED-CHAR-LABEL: test_mm256_srav_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_srav_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_srav_epi16(__A, __B); } __m256i test_mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_srav_epi16 - // CHECK: @llvm.x86.avx512.psrav.w.256( - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_srav_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_srav_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_srav_epi16(__W, __U, __A, __B); } __m256i test_mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_srav_epi16 - // CHECK: @llvm.x86.avx512.psrav.w.256( - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_srav_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_srav_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_srav_epi16(__U, __A, __B); } __m128i test_mm_srav_epi16(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_srav_epi16 - // CHECK: @llvm.x86.avx512.psrav.w.128( + // SIGNED-CHAR-LABEL: test_mm_srav_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_srav_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_srav_epi16(__A, __B); } __m128i test_mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_srav_epi16 - // CHECK: @llvm.x86.avx512.psrav.w.128( - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_srav_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_srav_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_srav_epi16(__W, __U, __A, __B); } __m128i test_mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_srav_epi16 - // CHECK: @llvm.x86.avx512.psrav.w.128( - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_srav_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_srav_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_srav_epi16(__U, __A, __B); } __m128i test_mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_sra_epi16 - // CHECK: @llvm.x86.sse2.psra.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_sra_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_sra_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_sra_epi16(__W, __U, __A, __B); } __m128i test_mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_sra_epi16 - // CHECK: @llvm.x86.sse2.psra.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_sra_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_sra_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_sra_epi16(__U, __A, __B); } __m256i test_mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_mask_sra_epi16 - // CHECK: @llvm.x86.avx2.psra.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_sra_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_sra_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_sra_epi16(__W, __U, __A, __B); } __m256i test_mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B) { - // CHECK-LABEL: @test_mm256_maskz_sra_epi16 - // CHECK: @llvm.x86.avx2.psra.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_sra_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_sra_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_sra_epi16(__U, __A, __B); } __m128i test_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_srai_epi16 - // CHECK: @llvm.x86.sse2.psrai.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_srai_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = ashr <8 x i16> %0, + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_srai_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = ashr <8 x i16> %0, + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_srai_epi16(__W, __U, __A, 5); } __m128i test_mm_mask_srai_epi16_2(__m128i __W, __mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_mask_srai_epi16_2 - // CHECK: @llvm.x86.sse2.psrai.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_srai_epi16_2 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %0, i32 %__B) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_srai_epi16_2 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %0, i32 %__B) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_srai_epi16(__W, __U, __A, __B); } __m128i test_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_srai_epi16 - // CHECK: @llvm.x86.sse2.psrai.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_srai_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = ashr <8 x i16> %0, + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_srai_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = ashr <8 x i16> %0, + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_srai_epi16(__U, __A, 5); } __m128i test_mm_maskz_srai_epi16_2(__mmask8 __U, __m128i __A, int __B) { - // CHECK-LABEL: @test_mm_maskz_srai_epi16_2 - // CHECK: @llvm.x86.sse2.psrai.w - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_srai_epi16_2 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %0, i32 %__B) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_srai_epi16_2 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %0, i32 %__B) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_srai_epi16(__U, __A, __B); } __m256i test_mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_srai_epi16 - // CHECK: @llvm.x86.avx2.psrai.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_srai_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = ashr <16 x i16> %0, + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_srai_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = ashr <16 x i16> %0, + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_srai_epi16(__W, __U, __A, 5); } __m256i test_mm256_mask_srai_epi16_2(__m256i __W, __mmask16 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_mask_srai_epi16_2 - // CHECK: @llvm.x86.avx2.psrai.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_srai_epi16_2 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %0, i32 %__B) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_srai_epi16_2 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %0, i32 %__B) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_srai_epi16(__W, __U, __A, __B); } __m256i test_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_srai_epi16 - // CHECK: @llvm.x86.avx2.psrai.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_srai_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = ashr <16 x i16> %0, + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_srai_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = ashr <16 x i16> %0, + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_srai_epi16(__U, __A, 5); } __m256i test_mm256_maskz_srai_epi16_2(__mmask16 __U, __m256i __A, int __B) { - // CHECK-LABEL: @test_mm256_maskz_srai_epi16_2 - // CHECK: @llvm.x86.avx2.psrai.w - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_srai_epi16_2 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %0, i32 %__B) #16 + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_srai_epi16_2 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %0, i32 %__B) #16 + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_srai_epi16(__U, __A, __B); } __m128i test_mm_mask_mov_epi16(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_mov_epi16 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_mov_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_mov_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_mask_mov_epi16(__W, __U, __A); } __m128i test_mm_maskz_mov_epi16(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_mov_epi16 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_mov_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_mov_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_maskz_mov_epi16(__U, __A); } __m256i test_mm256_mask_mov_epi16(__m256i __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_mov_epi16 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_mov_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_mov_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_mask_mov_epi16(__W, __U, __A); } __m256i test_mm256_maskz_mov_epi16(__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_mov_epi16 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_mov_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_mov_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_maskz_mov_epi16(__U, __A); } __m128i test_mm_mask_mov_epi8(__m128i __W, __mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_mov_epi8 - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_mov_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_mov_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_mask_mov_epi8(__W, __U, __A); } __m128i test_mm_maskz_mov_epi8(__mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_mov_epi8 - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_mov_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i8> %0, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_mov_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i8> %0, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_maskz_mov_epi8(__U, __A); } __m256i test_mm256_mask_mov_epi8(__m256i __W, __mmask32 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_mov_epi8 - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_mov_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_mov_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_mask_mov_epi8(__W, __U, __A); } __m256i test_mm256_maskz_mov_epi8(__mmask32 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_mov_epi8 - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_mov_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %2 = select <32 x i1> %1, <32 x i8> %0, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i8> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_mov_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <32 x i1> %1, <32 x i8> %0, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i8> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_maskz_mov_epi8(__U, __A); } __m128i test_mm_loadu_epi16(void const *__P) { - // CHECK-LABEL: @test_mm_loadu_epi16 - // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}} + // SIGNED-CHAR-LABEL: test_mm_loadu_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__P to <2 x i64>* + // SIGNED-CHAR-NEXT: %0 = load <2 x i64>, <2 x i64>* %__v.i, align 1, !tbaa !2 + // SIGNED-CHAR-NEXT: ret <2 x i64> %0 + // NO-SIGNED-CHAR-LABEL: test_mm_loadu_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__P to <2 x i64>* + // NO-SIGNED-CHAR-NEXT: %0 = load <2 x i64>, <2 x i64>* %__v.i, align 1, !tbaa !2 + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %0 return _mm_loadu_epi16(__P); } __m128i test_mm_mask_loadu_epi16(__m128i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_loadu_epi16 - // CHECK: @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x i16> %{{.*}}) + // SIGNED-CHAR-LABEL: test_mm_mask_loadu_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <8 x i16>* + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 1, <8 x i1> %2, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_loadu_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <8 x i16>* + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 1, <8 x i1> %2, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_mask_loadu_epi16(__W, __U, __P); } __m128i test_mm_maskz_loadu_epi16(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_loadu_epi16 - // CHECK: @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x i16> %{{.*}}) + // SIGNED-CHAR-LABEL: test_mm_maskz_loadu_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <8 x i16>* + // SIGNED-CHAR-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 1, <8 x i1> %1, <8 x i16> zeroinitializer) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_loadu_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <8 x i16>* + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 1, <8 x i1> %1, <8 x i16> zeroinitializer) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_maskz_loadu_epi16(__U, __P); } __m256i test_mm256_loadu_epi16(void const *__P) { - // CHECK-LABEL: @test_mm256_loadu_epi16 - // CHECK: load <4 x i64>, <4 x i64>* %{{.*}}, align 1{{$}} + // SIGNED-CHAR-LABEL: test_mm256_loadu_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__P to <4 x i64>* + // SIGNED-CHAR-NEXT: %0 = load <4 x i64>, <4 x i64>* %__v.i, align 1, !tbaa !2 + // SIGNED-CHAR-NEXT: ret <4 x i64> %0 + // NO-SIGNED-CHAR-LABEL: test_mm256_loadu_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__P to <4 x i64>* + // NO-SIGNED-CHAR-NEXT: %0 = load <4 x i64>, <4 x i64>* %__v.i, align 1, !tbaa !2 + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %0 return _mm256_loadu_epi16(__P); } __m256i test_mm256_mask_loadu_epi16(__m256i __W, __mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_loadu_epi16 - // CHECK: @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* %{{.*}}, i32 1, <16 x i1> %{{.*}}, <16 x i16> %{{.*}}) + // SIGNED-CHAR-LABEL: test_mm256_mask_loadu_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <16 x i16>* + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = tail call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* %0, i32 1, <16 x i1> %2, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_loadu_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <16 x i16>* + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = tail call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* %0, i32 1, <16 x i1> %2, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_mask_loadu_epi16(__W, __U, __P); } __m256i test_mm256_maskz_loadu_epi16(__mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_loadu_epi16 - // CHECK: @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* %{{.*}}, i32 1, <16 x i1> %{{.*}}, <16 x i16> %{{.*}}) + // SIGNED-CHAR-LABEL: test_mm256_maskz_loadu_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <16 x i16>* + // SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* %0, i32 1, <16 x i1> %1, <16 x i16> zeroinitializer) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_loadu_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <16 x i16>* + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* %0, i32 1, <16 x i1> %1, <16 x i16> zeroinitializer) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_maskz_loadu_epi16(__U, __P); } __m128i test_mm_loadu_epi8(void const *__P) { - // CHECK-LABEL: @test_mm_loadu_epi8 - // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}} + // SIGNED-CHAR-LABEL: test_mm_loadu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__P to <2 x i64>* + // SIGNED-CHAR-NEXT: %0 = load <2 x i64>, <2 x i64>* %__v.i, align 1, !tbaa !2 + // SIGNED-CHAR-NEXT: ret <2 x i64> %0 + // NO-SIGNED-CHAR-LABEL: test_mm_loadu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__P to <2 x i64>* + // NO-SIGNED-CHAR-NEXT: %0 = load <2 x i64>, <2 x i64>* %__v.i, align 1, !tbaa !2 + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %0 return _mm_loadu_epi8(__P); } __m128i test_mm_mask_loadu_epi8(__m128i __W, __mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm_mask_loadu_epi8 - // CHECK: @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %{{.*}}, i32 1, <16 x i1> %{{.*}}, <16 x i8> %{{.*}}) + // SIGNED-CHAR-LABEL: test_mm_mask_loadu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <16 x i8>* + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_loadu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <16 x i8>* + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_mask_loadu_epi8(__W, __U, __P); } __m128i test_mm_maskz_loadu_epi8(__mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm_maskz_loadu_epi8 - // CHECK: @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %{{.*}}, i32 1, <16 x i1> %{{.*}}, <16 x i8> %{{.*}}) + // SIGNED-CHAR-LABEL: test_mm_maskz_loadu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <16 x i8>* + // SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_loadu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <16 x i8>* + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_maskz_loadu_epi8(__U, __P); } __m256i test_mm256_loadu_epi8(void const *__P) { - // CHECK-LABEL: @test_mm256_loadu_epi8 - // CHECK: load <4 x i64>, <4 x i64>* %{{.*}}, align 1{{$}} + // SIGNED-CHAR-LABEL: test_mm256_loadu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__P to <4 x i64>* + // SIGNED-CHAR-NEXT: %0 = load <4 x i64>, <4 x i64>* %__v.i, align 1, !tbaa !2 + // SIGNED-CHAR-NEXT: ret <4 x i64> %0 + // NO-SIGNED-CHAR-LABEL: test_mm256_loadu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__P to <4 x i64>* + // NO-SIGNED-CHAR-NEXT: %0 = load <4 x i64>, <4 x i64>* %__v.i, align 1, !tbaa !2 + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %0 return _mm256_loadu_epi8(__P); } __m256i test_mm256_mask_loadu_epi8(__m256i __W, __mmask32 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_mask_loadu_epi8 - // CHECK: @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* %{{.*}}, i32 1, <32 x i1> %{{.*}}, <32 x i8> %{{.*}}) + // SIGNED-CHAR-LABEL: test_mm256_mask_loadu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <32 x i8>* + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %3 = tail call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* %0, i32 1, <32 x i1> %2, <32 x i8> %1) #16 + // SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_loadu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <32 x i8>* + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = tail call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* %0, i32 1, <32 x i1> %2, <32 x i8> %1) #16 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_mask_loadu_epi8(__W, __U, __P); } __m256i test_mm256_maskz_loadu_epi8(__mmask32 __U, void const *__P) { - // CHECK-LABEL: @test_mm256_maskz_loadu_epi8 - // CHECK: @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* %{{.*}}, i32 1, <32 x i1> %{{.*}}, <32 x i8> %{{.*}}) + // SIGNED-CHAR-LABEL: test_mm256_maskz_loadu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <32 x i8>* + // SIGNED-CHAR-NEXT: %1 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* %0, i32 1, <32 x i1> %1, <32 x i8> zeroinitializer) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i8> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_loadu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <32 x i8>* + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* %0, i32 1, <32 x i1> %1, <32 x i8> zeroinitializer) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i8> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_maskz_loadu_epi8(__U, __P); } void test_mm_storeu_epi16(void *__p, __m128i __a) { - // check-label: @test_mm_storeu_epi16 - // check: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + // SIGNED-CHAR-LABEL: test_mm_storeu_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__p to <2 x i64>* + // SIGNED-CHAR-NEXT: store <2 x i64> %__a, <2 x i64>* %__v.i, align 1, !tbaa !2 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm_storeu_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__p to <2 x i64>* + // NO-SIGNED-CHAR-NEXT: store <2 x i64> %__a, <2 x i64>* %__v.i, align 1, !tbaa !2 + // NO-SIGNED-CHAR-NEXT: ret void return _mm_storeu_epi16(__p, __a); } void test_mm_mask_storeu_epi16(void *__P, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_storeu_epi16 - // CHECK: @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %{{.*}}, <8 x i16>* %{{.*}}, i32 1, <8 x i1> %{{.*}}) + // SIGNED-CHAR-LABEL: test_mm_mask_storeu_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <8 x i16>* + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: tail call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %0, i32 1, <8 x i1> %2) #16 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm_mask_storeu_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <8 x i16>* + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: tail call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %0, i32 1, <8 x i1> %2) #16 + // NO-SIGNED-CHAR-NEXT: ret void return _mm_mask_storeu_epi16(__P, __U, __A); } void test_mm256_storeu_epi16(void *__P, __m256i __A) { - // CHECK-LABEL: @test_mm256_storeu_epi16 - // CHECK: store <4 x i64> %{{.*}}, <4 x i64>* %{{.*}}, align 1{{$}} + // SIGNED-CHAR-LABEL: test_mm256_storeu_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__P to <4 x i64>* + // SIGNED-CHAR-NEXT: store <4 x i64> %__A, <4 x i64>* %__v.i, align 1, !tbaa !2 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm256_storeu_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__P to <4 x i64>* + // NO-SIGNED-CHAR-NEXT: store <4 x i64> %__A, <4 x i64>* %__v.i, align 1, !tbaa !2 + // NO-SIGNED-CHAR-NEXT: ret void return _mm256_storeu_epi16(__P, __A); } void test_mm256_mask_storeu_epi16(void *__P, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_storeu_epi16 - // CHECK: @llvm.masked.store.v16i16.p0v16i16(<16 x i16> %{{.*}}, <16 x i16>* %{{.*}}, i32 1, <16 x i1> %{{.*}}) + // SIGNED-CHAR-LABEL: test_mm256_mask_storeu_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <16 x i16>* + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: tail call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> %1, <16 x i16>* %0, i32 1, <16 x i1> %2) #16 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_storeu_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <16 x i16>* + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: tail call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> %1, <16 x i16>* %0, i32 1, <16 x i1> %2) #16 + // NO-SIGNED-CHAR-NEXT: ret void return _mm256_mask_storeu_epi16(__P, __U, __A); } void test_mm_storeu_epi8(void *__p, __m128i __a) { - // check-label: @test_mm_storeu_epi8 - // check: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + // SIGNED-CHAR-LABEL: test_mm_storeu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__p to <2 x i64>* + // SIGNED-CHAR-NEXT: store <2 x i64> %__a, <2 x i64>* %__v.i, align 1, !tbaa !2 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm_storeu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__p to <2 x i64>* + // NO-SIGNED-CHAR-NEXT: store <2 x i64> %__a, <2 x i64>* %__v.i, align 1, !tbaa !2 + // NO-SIGNED-CHAR-NEXT: ret void return _mm_storeu_epi8(__p, __a); } void test_mm_mask_storeu_epi8(void *__P, __mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_storeu_epi8 - // CHECK: @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %{{.*}}, <16 x i8>* %{{.*}}, i32 1, <16 x i1> %{{.*}}) + // SIGNED-CHAR-LABEL: test_mm_mask_storeu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <16 x i8>* + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: tail call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %0, i32 1, <16 x i1> %2) #16 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm_mask_storeu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <16 x i8>* + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: tail call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %0, i32 1, <16 x i1> %2) #16 + // NO-SIGNED-CHAR-NEXT: ret void return _mm_mask_storeu_epi8(__P, __U, __A); } void test_mm256_storeu_epi8(void *__P, __m256i __A) { - // CHECK-LABEL: @test_mm256_storeu_epi8 - // CHECK: store <4 x i64> %{{.*}}, <4 x i64>* %{{.*}}, align 1{{$}} + // SIGNED-CHAR-LABEL: test_mm256_storeu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__P to <4 x i64>* + // SIGNED-CHAR-NEXT: store <4 x i64> %__A, <4 x i64>* %__v.i, align 1, !tbaa !2 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm256_storeu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %__v.i = bitcast i8* %__P to <4 x i64>* + // NO-SIGNED-CHAR-NEXT: store <4 x i64> %__A, <4 x i64>* %__v.i, align 1, !tbaa !2 + // NO-SIGNED-CHAR-NEXT: ret void return _mm256_storeu_epi8(__P, __A); } void test_mm256_mask_storeu_epi8(void *__P, __mmask32 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_storeu_epi8 - // CHECK: @llvm.masked.store.v32i8.p0v32i8(<32 x i8> %{{.*}}, <32 x i8>* %{{.*}}, i32 1, <32 x i1> %{{.*}}) + // SIGNED-CHAR-LABEL: test_mm256_mask_storeu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <32 x i8>* + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: tail call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> %1, <32 x i8>* %0, i32 1, <32 x i1> %2) #16 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_storeu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8* %__P to <32 x i8>* + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: tail call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> %1, <32 x i8>* %0, i32 1, <32 x i1> %2) #16 + // NO-SIGNED-CHAR-NEXT: ret void return _mm256_mask_storeu_epi8(__P, __U, __A); } __mmask16 test_mm_test_epi8_mask(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_test_epi8_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_test_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = icmp ne <16 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // SIGNED-CHAR-NEXT: ret i16 %2 + // NO-SIGNED-CHAR-LABEL: test_mm_test_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = icmp ne <16 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %2 return _mm_test_epi8_mask(__A, __B); } __mmask16 test_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_test_epi8_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_test_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = icmp ne <16 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = and <16 x i1> %1, %2 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i1> %3 to i16 + // SIGNED-CHAR-NEXT: ret i16 %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_test_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = icmp ne <16 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = and <16 x i1> %1, %2 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i1> %3 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %4 return _mm_mask_test_epi8_mask(__U, __A, __B); } __mmask32 test_mm256_test_epi8_mask(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_test_epi8_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_test_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = icmp ne <32 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <32 x i1> %1 to i32 + // SIGNED-CHAR-NEXT: ret i32 %2 + // NO-SIGNED-CHAR-LABEL: test_mm256_test_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = icmp ne <32 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <32 x i1> %1 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %2 return _mm256_test_epi8_mask(__A, __B); } __mmask32 test_mm256_mask_test_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_test_epi8_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_test_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = icmp ne <32 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %3 = and <32 x i1> %1, %2 + // SIGNED-CHAR-NEXT: %4 = bitcast <32 x i1> %3 to i32 + // SIGNED-CHAR-NEXT: ret i32 %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_test_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = icmp ne <32 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = and <32 x i1> %1, %2 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <32 x i1> %3 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %4 return _mm256_mask_test_epi8_mask(__U, __A, __B); } __mmask8 test_mm_test_epi16_mask(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_test_epi16_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_test_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = icmp ne <8 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // SIGNED-CHAR-NEXT: ret i8 %2 + // NO-SIGNED-CHAR-LABEL: test_mm_test_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = icmp ne <8 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %2 return _mm_test_epi16_mask(__A, __B); } __mmask8 test_mm_mask_test_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_test_epi16_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_test_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = icmp ne <8 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = and <8 x i1> %1, %2 + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // SIGNED-CHAR-NEXT: ret i8 %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_test_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = icmp ne <8 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = and <8 x i1> %1, %2 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %4 return _mm_mask_test_epi16_mask(__U, __A, __B); } __mmask16 test_mm256_test_epi16_mask(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_test_epi16_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_test_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = icmp ne <16 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // SIGNED-CHAR-NEXT: ret i16 %2 + // NO-SIGNED-CHAR-LABEL: test_mm256_test_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = icmp ne <16 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %2 return _mm256_test_epi16_mask(__A, __B); } __mmask16 test_mm256_mask_test_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_test_epi16_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp ne <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_test_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = icmp ne <16 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = and <16 x i1> %1, %2 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i1> %3 to i16 + // SIGNED-CHAR-NEXT: ret i16 %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_test_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = icmp ne <16 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = and <16 x i1> %1, %2 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i1> %3 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %4 return _mm256_mask_test_epi16_mask(__U, __A, __B); } __mmask16 test_mm_testn_epi8_mask(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_testn_epi8_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_testn_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = icmp eq <16 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // SIGNED-CHAR-NEXT: ret i16 %2 + // NO-SIGNED-CHAR-LABEL: test_mm_testn_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = icmp eq <16 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %2 return _mm_testn_epi8_mask(__A, __B); } __mmask16 test_mm_mask_testn_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_testn_epi8_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_testn_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = icmp eq <16 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = and <16 x i1> %1, %2 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i1> %3 to i16 + // SIGNED-CHAR-NEXT: ret i16 %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_testn_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = icmp eq <16 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = and <16 x i1> %1, %2 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i1> %3 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %4 return _mm_mask_testn_epi8_mask(__U, __A, __B); } __mmask32 test_mm256_testn_epi8_mask(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_testn_epi8_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <32 x i8> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_testn_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = icmp eq <32 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <32 x i1> %1 to i32 + // SIGNED-CHAR-NEXT: ret i32 %2 + // NO-SIGNED-CHAR-LABEL: test_mm256_testn_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = icmp eq <32 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <32 x i1> %1 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %2 return _mm256_testn_epi8_mask(__A, __B); } __mmask32 test_mm256_mask_testn_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_testn_epi8_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <32 x i8> %{{.*}}, %{{.*}} - // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_testn_epi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = icmp eq <32 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %3 = and <32 x i1> %1, %2 + // SIGNED-CHAR-NEXT: %4 = bitcast <32 x i1> %3 to i32 + // SIGNED-CHAR-NEXT: ret i32 %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_testn_epi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = icmp eq <32 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = and <32 x i1> %1, %2 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <32 x i1> %3 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %4 return _mm256_mask_testn_epi8_mask(__U, __A, __B); } __mmask8 test_mm_testn_epi16_mask(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_testn_epi16_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_testn_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = icmp eq <8 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // SIGNED-CHAR-NEXT: ret i8 %2 + // NO-SIGNED-CHAR-LABEL: test_mm_testn_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = icmp eq <8 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %2 return _mm_testn_epi16_mask(__A, __B); } __mmask8 test_mm_mask_testn_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_testn_epi16_mask - // CHECK: and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}} - // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_testn_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = icmp eq <8 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = and <8 x i1> %1, %2 + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // SIGNED-CHAR-NEXT: ret i8 %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_testn_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <2 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = icmp eq <8 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = and <8 x i1> %1, %2 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i1> %3 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %4 return _mm_mask_testn_epi16_mask(__U, __A, __B); } __mmask16 test_mm256_testn_epi16_mask(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_testn_epi16_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <16 x i16> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_testn_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = icmp eq <16 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // SIGNED-CHAR-NEXT: ret i16 %2 + // NO-SIGNED-CHAR-LABEL: test_mm256_testn_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = icmp eq <16 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %2 return _mm256_testn_epi16_mask(__A, __B); } __mmask16 test_mm256_mask_testn_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_testn_epi16_mask - // CHECK: and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: icmp eq <16 x i16> %{{.*}}, %{{.*}} - // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_testn_epi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = icmp eq <16 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = and <16 x i1> %1, %2 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i1> %3 to i16 + // SIGNED-CHAR-NEXT: ret i16 %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_testn_epi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %and.i.i = and <4 x i64> %__B, %__A + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = icmp eq <16 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = and <16 x i1> %1, %2 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i1> %3 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %4 return _mm256_mask_testn_epi16_mask(__U, __A, __B); } __mmask16 test_mm_movepi8_mask(__m128i __A) { - // CHECK-LABEL: @test_mm_movepi8_mask - // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer - // CHECK: bitcast <16 x i1> [[CMP]] to i16 + // SIGNED-CHAR-LABEL: test_mm_movepi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = icmp slt <16 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // SIGNED-CHAR-NEXT: ret i16 %2 + // NO-SIGNED-CHAR-LABEL: test_mm_movepi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = icmp slt <16 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %2 return _mm_movepi8_mask(__A); } __mmask32 test_mm256_movepi8_mask(__m256i __A) { - // CHECK-LABEL: @test_mm256_movepi8_mask - // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> %{{.*}}, zeroinitializer - // CHECK: bitcast <32 x i1> [[CMP]] to i32 + // SIGNED-CHAR-LABEL: test_mm256_movepi8_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = icmp slt <32 x i8> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <32 x i1> %1 to i32 + // SIGNED-CHAR-NEXT: ret i32 %2 + // NO-SIGNED-CHAR-LABEL: test_mm256_movepi8_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = icmp slt <32 x i8> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <32 x i1> %1 to i32 + // NO-SIGNED-CHAR-NEXT: ret i32 %2 return _mm256_movepi8_mask(__A); } __m128i test_mm_movm_epi8(__mmask16 __A) { - // CHECK-LABEL: @test_mm_movm_epi8 - // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: %vpmovm2.i = sext <16 x i1> %{{.*}} to <16 x i8> + // SIGNED-CHAR-LABEL: test_mm_movm_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i16 %__A to <16 x i1> + // SIGNED-CHAR-NEXT: %vpmovm2.i = sext <16 x i1> %0 to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <16 x i8> %vpmovm2.i to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %1 + // NO-SIGNED-CHAR-LABEL: test_mm_movm_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i16 %__A to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %vpmovm2.i = sext <16 x i1> %0 to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <16 x i8> %vpmovm2.i to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %1 return _mm_movm_epi8(__A); } __m256i test_mm256_movm_epi8(__mmask32 __A) { - // CHECK-LABEL: @test_mm256_movm_epi8 - // CHECK: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1> - // CHECK: %vpmovm2.i = sext <32 x i1> %{{.*}} to <32 x i8> + // SIGNED-CHAR-LABEL: test_mm256_movm_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i32 %__A to <32 x i1> + // SIGNED-CHAR-NEXT: %vpmovm2.i = sext <32 x i1> %0 to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <32 x i8> %vpmovm2.i to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %1 + // NO-SIGNED-CHAR-LABEL: test_mm256_movm_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i32 %__A to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %vpmovm2.i = sext <32 x i1> %0 to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <32 x i8> %vpmovm2.i to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %1 return _mm256_movm_epi8(__A); } __m128i test_mm_movm_epi16(__mmask8 __A) { - // CHECK-LABEL: @test_mm_movm_epi16 - // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: %vpmovm2.i = sext <8 x i1> %{{.*}} to <8 x i16> + // SIGNED-CHAR-LABEL: test_mm_movm_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i8 %__A to <8 x i1> + // SIGNED-CHAR-NEXT: %vpmovm2.i = sext <8 x i1> %0 to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <8 x i16> %vpmovm2.i to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %1 + // NO-SIGNED-CHAR-LABEL: test_mm_movm_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8 %__A to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %vpmovm2.i = sext <8 x i1> %0 to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <8 x i16> %vpmovm2.i to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %1 return _mm_movm_epi16(__A); } __m256i test_mm256_movm_epi16(__mmask16 __A) { - // CHECK-LABEL: @test_mm256_movm_epi16 - // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: %vpmovm2.i = sext <16 x i1> %{{.*}} to <16 x i16> + // SIGNED-CHAR-LABEL: test_mm256_movm_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast i16 %__A to <16 x i1> + // SIGNED-CHAR-NEXT: %vpmovm2.i = sext <16 x i1> %0 to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <16 x i16> %vpmovm2.i to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %1 + // NO-SIGNED-CHAR-LABEL: test_mm256_movm_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i16 %__A to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %vpmovm2.i = sext <16 x i1> %0 to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <16 x i16> %vpmovm2.i to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %1 return _mm256_movm_epi16(__A); } __m128i test_mm_mask_broadcastb_epi8(__m128i __O, __mmask16 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_broadcastb_epi8 - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> zeroinitializer - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_broadcastb_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %shuffle.i.i, <16 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_broadcastb_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %shuffle.i.i, <16 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_mask_broadcastb_epi8(__O, __M, __A); } __m128i test_mm_maskz_broadcastb_epi8(__mmask16 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_broadcastb_epi8 - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> zeroinitializer - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_broadcastb_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i8> %shuffle.i.i, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_broadcastb_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i8> %shuffle.i.i, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_maskz_broadcastb_epi8(__M, __A); } __m256i test_mm256_mask_broadcastb_epi8(__m256i __O, __mmask32 __M, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_broadcastb_epi8 - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <32 x i32> zeroinitializer - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_broadcastb_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <32 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__O to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %shuffle.i.i, <32 x i8> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_broadcastb_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <32 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__O to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %shuffle.i.i, <32 x i8> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_mask_broadcastb_epi8(__O, __M, __A); } __m256i test_mm256_maskz_broadcastb_epi8(__mmask32 __M, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_broadcastb_epi8 - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <32 x i32> zeroinitializer - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_broadcastb_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <32 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %1 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %2 = select <32 x i1> %1, <32 x i8> %shuffle.i.i, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i8> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_broadcastb_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <16 x i8> %0, <16 x i8> undef, <32 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <32 x i1> %1, <32 x i8> %shuffle.i.i, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i8> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_maskz_broadcastb_epi8(__M, __A); } __m128i test_mm_mask_broadcastw_epi16(__m128i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_broadcastw_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> zeroinitializer - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_broadcastw_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %shuffle.i.i, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_broadcastw_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__O to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %shuffle.i.i, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_mask_broadcastw_epi16(__O, __M, __A); } __m128i test_mm_maskz_broadcastw_epi16(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_broadcastw_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> zeroinitializer - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_broadcastw_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %shuffle.i.i, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_broadcastw_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %shuffle.i.i, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_maskz_broadcastw_epi16(__M, __A); } __m256i test_mm256_mask_broadcastw_epi16(__m256i __O, __mmask16 __M, __m128i __A) { - // CHECK-LABEL: @test_mm256_mask_broadcastw_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> zeroinitializer - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_broadcastw_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <16 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__O to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %shuffle.i.i, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_broadcastw_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <16 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__O to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %shuffle.i.i, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_mask_broadcastw_epi16(__O, __M, __A); } __m256i test_mm256_maskz_broadcastw_epi16(__mmask16 __M, __m128i __A) { - // CHECK-LABEL: @test_mm256_maskz_broadcastw_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> zeroinitializer - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_broadcastw_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <16 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %shuffle.i.i, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_broadcastw_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %shuffle.i.i = shufflevector <8 x i16> %0, <8 x i16> undef, <16 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %shuffle.i.i, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_maskz_broadcastw_epi16(__M, __A); } __m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){ - // CHECK-LABEL: @test_mm_mask_set1_epi8 - // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_set1_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <16 x i8> undef, i8 %__A, i32 0 + // SIGNED-CHAR-NEXT: %vecinit15.i.i.i = shufflevector <16 x i8> %vecinit.i.i.i, <16 x i8> undef, <16 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i8> %vecinit15.i.i.i, <16 x i8> %0 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_set1_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <16 x i8> undef, i8 %__A, i32 0 + // NO-SIGNED-CHAR-NEXT: %vecinit15.i.i.i = shufflevector <16 x i8> %vecinit.i.i.i, <16 x i8> undef, <16 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__O to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i8> %vecinit15.i.i.i, <16 x i8> %0 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_mask_set1_epi8(__O, __M, __A); } __m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){ - // CHECK-LABEL: @test_mm_maskz_set1_epi8 - // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_set1_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <16 x i8> undef, i8 %__A, i32 0 + // SIGNED-CHAR-NEXT: %vecinit15.i.i.i = shufflevector <16 x i8> %vecinit.i.i.i, <16 x i8> undef, <16 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %0 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %1 = select <16 x i1> %0, <16 x i8> %vecinit15.i.i.i, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %2 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_set1_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <16 x i8> undef, i8 %__A, i32 0 + // NO-SIGNED-CHAR-NEXT: %vecinit15.i.i.i = shufflevector <16 x i8> %vecinit.i.i.i, <16 x i8> undef, <16 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %1 = select <16 x i1> %0, <16 x i8> %vecinit15.i.i.i, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i8> %1 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %2 return _mm_maskz_set1_epi8( __M, __A); } __m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) { - // CHECK-LABEL: @test_mm256_mask_set1_epi8 - // CHECK: insertelement <32 x i8> undef, i8 %{{.*}}, i32 0 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 1 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 2 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 3 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 4 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 5 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 6 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 7 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 8 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 9 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 10 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 11 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 12 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 13 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 15 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 16 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 17 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 18 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 19 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 20 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 21 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 22 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 23 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 24 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 25 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 26 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 27 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 28 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 29 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 30 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 31 - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_set1_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <32 x i8> undef, i8 %__A, i32 0 + // SIGNED-CHAR-NEXT: %vecinit31.i.i.i = shufflevector <32 x i8> %vecinit.i.i.i, <32 x i8> undef, <32 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__O to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %2 = select <32 x i1> %1, <32 x i8> %vecinit31.i.i.i, <32 x i8> %0 + // SIGNED-CHAR-NEXT: %3 = bitcast <32 x i8> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_set1_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <32 x i8> undef, i8 %__A, i32 0 + // NO-SIGNED-CHAR-NEXT: %vecinit31.i.i.i = shufflevector <32 x i8> %vecinit.i.i.i, <32 x i8> undef, <32 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__O to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <32 x i1> %1, <32 x i8> %vecinit31.i.i.i, <32 x i8> %0 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <32 x i8> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_mask_set1_epi8(__O, __M, __A); } __m256i test_mm256_maskz_set1_epi8( __mmask32 __M, char __A) { - // CHECK-LABEL: @test_mm256_maskz_set1_epi8 - // CHECK: insertelement <32 x i8> undef, i8 %{{.*}}, i32 0 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 1 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 2 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 3 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 4 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 5 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 6 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 7 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 8 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 9 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 10 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 11 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 12 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 13 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 15 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 16 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 17 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 18 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 19 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 20 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 21 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 22 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 23 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 24 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 25 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 26 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 27 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 28 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 29 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 30 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 31 - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_set1_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <32 x i8> undef, i8 %__A, i32 0 + // SIGNED-CHAR-NEXT: %vecinit31.i.i.i = shufflevector <32 x i8> %vecinit.i.i.i, <32 x i8> undef, <32 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %0 = bitcast i32 %__M to <32 x i1> + // SIGNED-CHAR-NEXT: %1 = select <32 x i1> %0, <32 x i8> %vecinit31.i.i.i, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <32 x i8> %1 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %2 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_set1_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <32 x i8> undef, i8 %__A, i32 0 + // NO-SIGNED-CHAR-NEXT: %vecinit31.i.i.i = shufflevector <32 x i8> %vecinit.i.i.i, <32 x i8> undef, <32 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i32 %__M to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %1 = select <32 x i1> %0, <32 x i8> %vecinit31.i.i.i, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <32 x i8> %1 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %2 return _mm256_maskz_set1_epi8( __M, __A); } __m256i test_mm256_mask_set1_epi16(__m256i __O, __mmask16 __M, short __A) { - // CHECK-LABEL: @test_mm256_mask_set1_epi16 - // CHECK: insertelement <16 x i16> undef, i16 %{{.*}}, i32 0 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 1 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 2 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 3 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 4 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 5 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 6 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 7 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 8 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 9 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 10 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 11 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 12 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 13 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 14 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 15 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_set1_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <16 x i16> undef, i16 %__A, i32 0 + // SIGNED-CHAR-NEXT: %vecinit15.i.i.i = shufflevector <16 x i16> %vecinit.i.i.i, <16 x i16> undef, <16 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__O to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %vecinit15.i.i.i, <16 x i16> %0 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_set1_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <16 x i16> undef, i16 %__A, i32 0 + // NO-SIGNED-CHAR-NEXT: %vecinit15.i.i.i = shufflevector <16 x i16> %vecinit.i.i.i, <16 x i16> undef, <16 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__O to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %vecinit15.i.i.i, <16 x i16> %0 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_mask_set1_epi16(__O, __M, __A); } __m256i test_mm256_maskz_set1_epi16(__mmask16 __M, short __A) { - // CHECK-LABEL: @test_mm256_maskz_set1_epi16 - // CHECK: insertelement <16 x i16> undef, i16 %{{.*}}, i32 0 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 1 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 2 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 3 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 4 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 5 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 6 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 7 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 8 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 9 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 10 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 11 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 12 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 13 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 14 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 15 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_set1_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <16 x i16> undef, i16 %__A, i32 0 + // SIGNED-CHAR-NEXT: %vecinit15.i.i.i = shufflevector <16 x i16> %vecinit.i.i.i, <16 x i16> undef, <16 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %0 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %1 = select <16 x i1> %0, <16 x i16> %vecinit15.i.i.i, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %2 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_set1_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <16 x i16> undef, i16 %__A, i32 0 + // NO-SIGNED-CHAR-NEXT: %vecinit15.i.i.i = shufflevector <16 x i16> %vecinit.i.i.i, <16 x i16> undef, <16 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %1 = select <16 x i1> %0, <16 x i16> %vecinit15.i.i.i, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i16> %1 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %2 return _mm256_maskz_set1_epi16(__M, __A); } __m128i test_mm_mask_set1_epi16(__m128i __O, __mmask8 __M, short __A) { - // CHECK-LABEL: @test_mm_mask_set1_epi16 - // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_set1_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <8 x i16> undef, i16 %__A, i32 0 + // SIGNED-CHAR-NEXT: %vecinit7.i.i.i = shufflevector <8 x i16> %vecinit.i.i.i, <8 x i16> undef, <8 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %vecinit7.i.i.i, <8 x i16> %0 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_set1_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <8 x i16> undef, i16 %__A, i32 0 + // NO-SIGNED-CHAR-NEXT: %vecinit7.i.i.i = shufflevector <8 x i16> %vecinit.i.i.i, <8 x i16> undef, <8 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__O to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %vecinit7.i.i.i, <8 x i16> %0 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_mask_set1_epi16(__O, __M, __A); } __m128i test_mm_maskz_set1_epi16(__mmask8 __M, short __A) { - // CHECK-LABEL: @test_mm_maskz_set1_epi16 - // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6 - // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_set1_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <8 x i16> undef, i16 %__A, i32 0 + // SIGNED-CHAR-NEXT: %vecinit7.i.i.i = shufflevector <8 x i16> %vecinit.i.i.i, <8 x i16> undef, <8 x i32> zeroinitializer + // SIGNED-CHAR-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %1 = select <8 x i1> %0, <8 x i16> %vecinit7.i.i.i, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %2 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_set1_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %vecinit.i.i.i = insertelement <8 x i16> undef, i16 %__A, i32 0 + // NO-SIGNED-CHAR-NEXT: %vecinit7.i.i.i = shufflevector <8 x i16> %vecinit.i.i.i, <8 x i16> undef, <8 x i32> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %0 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %1 = select <8 x i1> %0, <8 x i16> %vecinit7.i.i.i, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <8 x i16> %1 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %2 return _mm_maskz_set1_epi16(__M, __A); } __m128i test_mm_permutexvar_epi16(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_permutexvar_epi16 - // CHECK: @llvm.x86.avx512.permvar.hi.128 + // SIGNED-CHAR-LABEL: test_mm_permutexvar_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_permutexvar_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_permutexvar_epi16(__A, __B); } __m128i test_mm_maskz_permutexvar_epi16(__mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_permutexvar_epi16 - // CHECK: @llvm.x86.avx512.permvar.hi.128 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_permutexvar_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_permutexvar_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_permutexvar_epi16(__M, __A, __B); } __m128i test_mm_mask_permutexvar_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_permutexvar_epi16 - // CHECK: @llvm.x86.avx512.permvar.hi.128 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_permutexvar_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__B to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %0, <8 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_permutexvar_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__B to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %0, <8 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__M to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_permutexvar_epi16(__W, __M, __A, __B); } __m256i test_mm256_permutexvar_epi16(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_permutexvar_epi16 - // CHECK: @llvm.x86.avx512.permvar.hi.256 + // SIGNED-CHAR-LABEL: test_mm256_permutexvar_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_permutexvar_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_permutexvar_epi16(__A, __B); } __m256i test_mm256_maskz_permutexvar_epi16(__mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_permutexvar_epi16 - // CHECK: @llvm.x86.avx512.permvar.hi.256 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_permutexvar_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_permutexvar_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_permutexvar_epi16(__M, __A, __B); } __m256i test_mm256_mask_permutexvar_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_permutexvar_epi16 - // CHECK: @llvm.x86.avx512.permvar.hi.256 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_permutexvar_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__B to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %0, <16 x i16> %1) #16 + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_permutexvar_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__B to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %0, <16 x i16> %1) #16 + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__M to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_permutexvar_epi16(__W, __M, __A, __B); } __m128i test_mm_mask_alignr_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_alignr_epi8 - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_alignr_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %palignr = shufflevector <16 x i8> %1, <16 x i8> %0, <16 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <16 x i8> + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %palignr, <16 x i8> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_alignr_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %palignr = shufflevector <16 x i8> %1, <16 x i8> %0, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <2 x i64> %__W to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i8> %palignr, <16 x i8> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i8> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_mask_alignr_epi8(__W, __U, __A, __B, 2); } __m128i test_mm_maskz_alignr_epi8(__mmask16 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_alignr_epi8 - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_alignr_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %palignr = shufflevector <16 x i8> %1, <16 x i8> %0, <16 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %palignr, <16 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_alignr_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %palignr = shufflevector <16 x i8> %1, <16 x i8> %0, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i8> %palignr, <16 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i8> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_maskz_alignr_epi8(__U, __A, __B, 2); } __m256i test_mm256_mask_alignr_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_alignr_epi8 - // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_alignr_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %palignr = shufflevector <32 x i8> %1, <32 x i8> %0, <32 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <32 x i8> + // SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %palignr, <32 x i8> %2 + // SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_alignr_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %palignr = shufflevector <32 x i8> %1, <32 x i8> %0, <32 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <4 x i64> %__W to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <32 x i1> %3, <32 x i8> %palignr, <32 x i8> %2 + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <32 x i8> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_mask_alignr_epi8(__W, __U, __A, __B, 2); } __m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_alignr_epi8 - // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_alignr_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %palignr = shufflevector <32 x i8> %1, <32 x i8> %0, <32 x i32> + // SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %palignr, <32 x i8> zeroinitializer + // SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_alignr_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %palignr = shufflevector <32 x i8> %1, <32 x i8> %0, <32 x i32> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i32 %__U to <32 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <32 x i1> %2, <32 x i8> %palignr, <32 x i8> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <32 x i8> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_maskz_alignr_epi8(__U, __A, __B, 2); } __m128i test_mm_dbsad_epu8(__m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_dbsad_epu8 - // CHECK: @llvm.x86.avx512.dbpsadbw.128 + // SIGNED-CHAR-LABEL: test_mm_dbsad_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %0, <16 x i8> %1, i32 170) + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_dbsad_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %0, <16 x i8> %1, i32 170) + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_dbsad_epu8(__A, __B, 170); } __m128i test_mm_mask_dbsad_epu8(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_mask_dbsad_epu8 - // CHECK: @llvm.x86.avx512.dbpsadbw.128 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_dbsad_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %0, <16 x i8> %1, i32 170) + // SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_dbsad_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %0, <16 x i8> %1, i32 170) + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <8 x i16> %5 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %6 return _mm_mask_dbsad_epu8(__W, __U, __A, __B, 170); } __m128i test_mm_maskz_dbsad_epu8(__mmask8 __U, __m128i __A, __m128i __B) { - // CHECK-LABEL: @test_mm_maskz_dbsad_epu8 - // CHECK: @llvm.x86.avx512.dbpsadbw.128 - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_dbsad_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %0, <16 x i8> %1, i32 170) + // SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_dbsad_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__B to <16 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %0, <16 x i8> %1, i32 170) + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <8 x i16> %4 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %5 return _mm_maskz_dbsad_epu8(__U, __A, __B, 170); } __m256i test_mm256_dbsad_epu8(__m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_dbsad_epu8 - // CHECK: @llvm.x86.avx512.dbpsadbw.256 + // SIGNED-CHAR-LABEL: test_mm256_dbsad_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %0, <32 x i8> %1, i32 170) + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_dbsad_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %0, <32 x i8> %1, i32 170) + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_dbsad_epu8(__A, __B, 170); } __m256i test_mm256_mask_dbsad_epu8(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_mask_dbsad_epu8 - // CHECK: @llvm.x86.avx512.dbpsadbw.256 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_dbsad_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %0, <32 x i8> %1, i32 170) + // SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %6 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_dbsad_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %0, <32 x i8> %1, i32 170) + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %4 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 + // NO-SIGNED-CHAR-NEXT: %6 = bitcast <16 x i16> %5 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %6 return _mm256_mask_dbsad_epu8(__W, __U, __A, __B, 170); } __m256i test_mm256_maskz_dbsad_epu8(__mmask16 __U, __m256i __A, __m256i __B) { - // CHECK-LABEL: @test_mm256_maskz_dbsad_epu8 - // CHECK: @llvm.x86.avx512.dbpsadbw.256 - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_dbsad_epu8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %0, <32 x i8> %1, i32 170) + // SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %5 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_dbsad_epu8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__B to <32 x i8> + // NO-SIGNED-CHAR-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %0, <32 x i8> %1, i32 170) + // NO-SIGNED-CHAR-NEXT: %3 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %5 = bitcast <16 x i16> %4 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %5 return _mm256_maskz_dbsad_epu8(__U, __A, __B, 170); } __mmask8 test_mm_movepi16_mask(__m128i __A) { - // CHECK-LABEL: @test_mm_movepi16_mask - // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> %{{.*}}, zeroinitializer - // CHECK: bitcast <8 x i1> [[CMP]] to i8 + // SIGNED-CHAR-LABEL: test_mm_movepi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %1 = icmp slt <8 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // SIGNED-CHAR-NEXT: ret i8 %2 + // NO-SIGNED-CHAR-LABEL: test_mm_movepi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = icmp slt <8 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <8 x i1> %1 to i8 + // NO-SIGNED-CHAR-NEXT: ret i8 %2 return _mm_movepi16_mask(__A); } __mmask16 test_mm256_movepi16_mask(__m256i __A) { - // CHECK-LABEL: @test_mm256_movepi16_mask - // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer - // CHECK: bitcast <16 x i1> [[CMP]] to i16 + // SIGNED-CHAR-LABEL: test_mm256_movepi16_mask + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %1 = icmp slt <16 x i16> %0, zeroinitializer + // SIGNED-CHAR-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // SIGNED-CHAR-NEXT: ret i16 %2 + // NO-SIGNED-CHAR-LABEL: test_mm256_movepi16_mask + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %1 = icmp slt <16 x i16> %0, zeroinitializer + // NO-SIGNED-CHAR-NEXT: %2 = bitcast <16 x i1> %1 to i16 + // NO-SIGNED-CHAR-NEXT: ret i16 %2 return _mm256_movepi16_mask(__A); } __m128i test_mm_mask_shufflehi_epi16(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_shufflehi_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_shufflehi_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %pshufhw = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %pshufhw, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_shufflehi_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %pshufhw = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %pshufhw, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_mask_shufflehi_epi16(__W, __U, __A, 5); } __m128i test_mm_maskz_shufflehi_epi16(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_shufflehi_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_shufflehi_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %pshufhw = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %pshufhw, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_shufflehi_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %pshufhw = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %pshufhw, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_maskz_shufflehi_epi16(__U, __A, 5); } __m128i test_mm_mask_shufflelo_epi16(__m128i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_mask_shufflelo_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_mask_shufflelo_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %pshuflw = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <8 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %pshuflw, <8 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm_mask_shufflelo_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %pshuflw = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <2 x i64> %__W to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <8 x i1> %2, <8 x i16> %pshuflw, <8 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <8 x i16> %3 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %4 return _mm_mask_shufflelo_epi16(__W, __U, __A, 5); } __m128i test_mm_maskz_shufflelo_epi16(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm_maskz_shufflelo_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm_maskz_shufflelo_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: %pshuflw = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %pshuflw, <8 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // SIGNED-CHAR-NEXT: ret <2 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm_maskz_shufflelo_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: %pshuflw = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i8 %__U to <8 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <8 x i1> %1, <8 x i16> %pshuflw, <8 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <8 x i16> %2 to <2 x i64> + // NO-SIGNED-CHAR-NEXT: ret <2 x i64> %3 return _mm_maskz_shufflelo_epi16(__U, __A, 5); } __m256i test_mm256_mask_shufflehi_epi16(__m256i __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_shufflehi_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_shufflehi_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %pshufhw = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %pshufhw, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_shufflehi_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %pshufhw = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %pshufhw, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_mask_shufflehi_epi16(__W, __U, __A, 5); } __m256i test_mm256_maskz_shufflehi_epi16(__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_shufflehi_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_shufflehi_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %pshufhw = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %pshufhw, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_shufflehi_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %pshufhw = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %pshufhw, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_maskz_shufflehi_epi16(__U, __A, 5); } __m256i test_mm256_mask_shufflelo_epi16(__m256i __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_mask_shufflelo_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_mask_shufflelo_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %pshuflw = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <16 x i16> + // SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %pshuflw, <16 x i16> %1 + // SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %4 + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_shufflelo_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %pshuflw = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast <4 x i64> %__W to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %2 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %3 = select <16 x i1> %2, <16 x i16> %pshuflw, <16 x i16> %1 + // NO-SIGNED-CHAR-NEXT: %4 = bitcast <16 x i16> %3 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %4 return _mm256_mask_shufflelo_epi16(__W, __U, __A, 5); } __m256i test_mm256_maskz_shufflelo_epi16(__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm256_maskz_shufflelo_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + // SIGNED-CHAR-LABEL: test_mm256_maskz_shufflelo_epi16 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: %pshuflw = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> + // SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %pshuflw, <16 x i16> zeroinitializer + // SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // SIGNED-CHAR-NEXT: ret <4 x i64> %3 + // NO-SIGNED-CHAR-LABEL: test_mm256_maskz_shufflelo_epi16 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: %pshuflw = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> + // NO-SIGNED-CHAR-NEXT: %1 = bitcast i16 %__U to <16 x i1> + // NO-SIGNED-CHAR-NEXT: %2 = select <16 x i1> %1, <16 x i16> %pshuflw, <16 x i16> zeroinitializer + // NO-SIGNED-CHAR-NEXT: %3 = bitcast <16 x i16> %2 to <4 x i64> + // NO-SIGNED-CHAR-NEXT: ret <4 x i64> %3 return _mm256_maskz_shufflelo_epi16(__U, __A, 5); } void test_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL:@test_mm_mask_cvtepi16_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.wb.mem.128 - _mm_mask_cvtepi16_storeu_epi8 (__P, __M, __A); + // SIGNED-CHAR-LABEL: test_mm_mask_cvtepi16_storeu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: tail call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %__P, <8 x i16> %0, i8 %__M) #16 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cvtepi16_storeu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: tail call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %__P, <8 x i16> %0, i8 %__M) #16 + // NO-SIGNED-CHAR-NEXT: ret void + _mm_mask_cvtepi16_storeu_epi8(__P, __M, __A); } void test_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL:@test_mm_mask_cvtsepi16_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.wb.mem.128 + // SIGNED-CHAR-LABEL: test_mm_mask_cvtsepi16_storeu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %__P, <8 x i16> %0, i8 %__M) #16 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cvtsepi16_storeu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %__P, <8 x i16> %0, i8 %__M) #16 + // NO-SIGNED-CHAR-NEXT: ret void _mm_mask_cvtsepi16_storeu_epi8 ( __P, __M, __A); } void test_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { - // CHECK-LABEL:@test_mm_mask_cvtusepi16_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.wb.mem.128 + // SIGNED-CHAR-LABEL: test_mm_mask_cvtusepi16_storeu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // SIGNED-CHAR-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %__P, <8 x i16> %0, i8 %__M) #16 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm_mask_cvtusepi16_storeu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <2 x i64> %__A to <8 x i16> + // NO-SIGNED-CHAR-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %__P, <8 x i16> %0, i8 %__M) #16 + // NO-SIGNED-CHAR-NEXT: ret void _mm_mask_cvtusepi16_storeu_epi8 (__P, __M, __A); } void test_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { - // CHECK-LABEL:@test_mm256_mask_cvtusepi16_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovus.wb.mem.256 + // SIGNED-CHAR-LABEL: test_mm256_mask_cvtusepi16_storeu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %__P, <16 x i16> %0, i16 %__M) #16 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cvtusepi16_storeu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: tail call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %__P, <16 x i16> %0, i16 %__M) #16 + // NO-SIGNED-CHAR-NEXT: ret void _mm256_mask_cvtusepi16_storeu_epi8 ( __P, __M, __A); } void test_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { - // CHECK-LABEL:@test_mm256_mask_cvtepi16_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.wb.mem.256 + // SIGNED-CHAR-LABEL: test_mm256_mask_cvtepi16_storeu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: tail call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %__P, <16 x i16> %0, i16 %__M) #16 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cvtepi16_storeu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: tail call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %__P, <16 x i16> %0, i16 %__M) #16 + // NO-SIGNED-CHAR-NEXT: ret void _mm256_mask_cvtepi16_storeu_epi8 ( __P, __M, __A); } void test_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { - // CHECK-LABEL:@test_mm256_mask_cvtsepi16_storeu_epi8 - // CHECK: @llvm.x86.avx512.mask.pmovs.wb.mem.256 - _mm256_mask_cvtsepi16_storeu_epi8 ( __P, __M, __A); + // SIGNED-CHAR-LABEL: test_mm256_mask_cvtsepi16_storeu_epi8 + // SIGNED-CHAR: entry: + // SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // SIGNED-CHAR-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %__P, <16 x i16> %0, i16 %__M) #16 + // SIGNED-CHAR-NEXT: ret void + // NO-SIGNED-CHAR-LABEL: test_mm256_mask_cvtsepi16_storeu_epi8 + // NO-SIGNED-CHAR: entry: + // NO-SIGNED-CHAR-NEXT: %0 = bitcast <4 x i64> %__A to <16 x i16> + // NO-SIGNED-CHAR-NEXT: tail call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %__P, <16 x i16> %0, i16 %__M) #16 + // NO-SIGNED-CHAR-NEXT: ret void + _mm256_mask_cvtsepi16_storeu_epi8(__P, __M, __A); } diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl --- a/clang/test/CodeGenOpenCL/convergent.cl +++ b/clang/test/CodeGenOpenCL/convergent.cl @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fno-experimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-LEGACY +// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fexperimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-NEWPM // This is initially assumed convergent, but can be deduced to not require it. @@ -117,7 +118,12 @@ // CHECK: [[for_body]]: // CHECK: tail call spir_func void @nodupfun() #[[attr5:[0-9]+]] // CHECK-NOT: call spir_func void @nodupfun() -// CHECK: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] + +// The new PM produces a slightly different IR for the loop from the legacy PM, +// but the test still checks that the loop is not unrolled. +// CHECK-LEGACY: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] +// CHECK-NEW: br i1 %{{.+}}, label %[[for_body_crit_edge:.+]], label %[[for_cond_cleanup]] +// CHECK-NEW: [[for_body_crit_edge]]: void test_not_unroll() { for (int i = 0; i < 10; i++)