Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -9152,44 +9152,7 @@ return Builder.CreateExtractValue(Call, 1); } - case X86::BI__builtin_ia32_cmpps128_mask: - case X86::BI__builtin_ia32_cmpps256_mask: - case X86::BI__builtin_ia32_cmpps512_mask: - case X86::BI__builtin_ia32_cmppd128_mask: - case X86::BI__builtin_ia32_cmppd256_mask: - case X86::BI__builtin_ia32_cmppd512_mask: { - unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); - Value *MaskIn = Ops[3]; - Ops.erase(&Ops[3]); - - Intrinsic::ID ID; - switch (BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_cmpps128_mask: - ID = Intrinsic::x86_avx512_mask_cmp_ps_128; - break; - case X86::BI__builtin_ia32_cmpps256_mask: - ID = Intrinsic::x86_avx512_mask_cmp_ps_256; - break; - case X86::BI__builtin_ia32_cmpps512_mask: - ID = Intrinsic::x86_avx512_mask_cmp_ps_512; - break; - case X86::BI__builtin_ia32_cmppd128_mask: - ID = Intrinsic::x86_avx512_mask_cmp_pd_128; - break; - case X86::BI__builtin_ia32_cmppd256_mask: - ID = Intrinsic::x86_avx512_mask_cmp_pd_256; - break; - case X86::BI__builtin_ia32_cmppd512_mask: - ID = Intrinsic::x86_avx512_mask_cmp_pd_512; - break; - } - - Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); - return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn); - } - - // SSE packed comparison intrinsics + // packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: case X86::BI__builtin_ia32_cmpeqpd: return getVectorFCmpIR(CmpInst::FCMP_OEQ); @@ -9217,10 +9180,44 @@ case X86::BI__builtin_ia32_cmpps: case X86::BI__builtin_ia32_cmpps256: case X86::BI__builtin_ia32_cmppd: - case X86::BI__builtin_ia32_cmppd256: { + case X86::BI__builtin_ia32_cmppd256: + case X86::BI__builtin_ia32_cmpps128_mask: + case X86::BI__builtin_ia32_cmpps256_mask: + case X86::BI__builtin_ia32_cmpps512_mask: + case X86::BI__builtin_ia32_cmppd128_mask: + case X86::BI__builtin_ia32_cmppd256_mask: + case X86::BI__builtin_ia32_cmppd512_mask: { + bool ReturnsMask = false; + bool UsesNonDefaultRounding = false; + Value *MaskIn; + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + + switch (BuiltinID) { + case X86::BI__builtin_ia32_cmpps512_mask: + case X86::BI__builtin_ia32_cmppd512_mask: + // these two builtins have an fifth argument specifying a rounding method + // thus these can only be lowered to fcmp if the rounding method + // is _MM_FROUND_CUR_DIRECTION + if (cast(Ops[4])->getZExtValue() != 4) + UsesNonDefaultRounding = true; + // FALLTHROUGH + case X86::BI__builtin_ia32_cmpps128_mask: + case X86::BI__builtin_ia32_cmpps256_mask: + case X86::BI__builtin_ia32_cmppd128_mask: + case X86::BI__builtin_ia32_cmppd256_mask: + ReturnsMask = true; + MaskIn = Ops[3]; + Ops.erase(&Ops[3]); + break; + } + + // The third argument is the comparison condition, and integer in the + // range [0, 31] unsigned CC = cast(Ops[2])->getZExtValue(); - // If this one of the SSE immediates, we can use native IR. - if (CC < 8) { + + // If this one of the vector immediates, we can use native IR. + // But only when -fast-math is enabled + if (CC < 8 && !UsesNonDefaultRounding && getLangOpts().FastMath) { FCmpInst::Predicate Pred; switch (CC) { case 0: Pred = FCmpInst::FCMP_OEQ; break; @@ -9232,7 +9229,30 @@ case 6: Pred = FCmpInst::FCMP_UGT; break; case 7: Pred = FCmpInst::FCMP_ORD; break; } - return getVectorFCmpIR(Pred); + + Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); + + // Builtins without the _mask suffix return a vector of integers + // of the same width as the input vectors + if (ReturnsMask) + return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn); + + llvm::VectorType *FPVecTy = cast(Ops[0]->getType()); + llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); + Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); + return Builder.CreateBitCast(Sext, FPVecTy); + } + + // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector + // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... + if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { + llvm::Type *ResultType = ConvertType(E->getType()); + + Value *Constant = (CC == 0xf || CC == 0x1f) ? + llvm::Constant::getAllOnesValue(ResultType) : + llvm::Constant::getNullValue(ResultType); + + return Constant; // Return constant mask, or constant vector } // We can't handle 8-31 immediates with native IR, use the intrinsic. @@ -9240,41 +9260,43 @@ Intrinsic::ID ID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_cmpps128_mask: + ID = Intrinsic::x86_avx512_mask_cmp_ps_128; + break; + case X86::BI__builtin_ia32_cmpps256_mask: + ID = Intrinsic::x86_avx512_mask_cmp_ps_256; + break; + case X86::BI__builtin_ia32_cmpps512_mask: + ID = Intrinsic::x86_avx512_mask_cmp_ps_512; + break; + case X86::BI__builtin_ia32_cmppd128_mask: + ID = Intrinsic::x86_avx512_mask_cmp_pd_128; + break; + case X86::BI__builtin_ia32_cmppd256_mask: + ID = Intrinsic::x86_avx512_mask_cmp_pd_256; + break; + case X86::BI__builtin_ia32_cmppd512_mask: + ID = Intrinsic::x86_avx512_mask_cmp_pd_512; + break; case X86::BI__builtin_ia32_cmpps: ID = Intrinsic::x86_sse_cmp_ps; break; case X86::BI__builtin_ia32_cmpps256: - // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector - // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... - if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { - Value *Constant = (CC == 0xf || CC == 0x1f) ? - llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) : - llvm::Constant::getNullValue(Builder.getInt32Ty()); - Value *Vec = Builder.CreateVectorSplat( - Ops[0]->getType()->getVectorNumElements(), Constant); - return Builder.CreateBitCast(Vec, Ops[0]->getType()); - } ID = Intrinsic::x86_avx_cmp_ps_256; break; case X86::BI__builtin_ia32_cmppd: ID = Intrinsic::x86_sse2_cmp_pd; break; case X86::BI__builtin_ia32_cmppd256: - // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector - // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... - if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { - Value *Constant = (CC == 0xf || CC == 0x1f) ? - llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) : - llvm::Constant::getNullValue(Builder.getInt64Ty()); - Value *Vec = Builder.CreateVectorSplat( - Ops[0]->getType()->getVectorNumElements(), Constant); - return Builder.CreateBitCast(Vec, Ops[0]->getType()); - } ID = Intrinsic::x86_avx_cmp_pd_256; break; } - return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + if (ReturnsMask) + return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn); + + return Cmp; } // SSE scalar comparison intrinsics Index: test/CodeGen/avx-builtins.c =================================================================== --- test/CodeGen/avx-builtins.c +++ test/CodeGen/avx-builtins.c @@ -1475,3 +1475,51 @@ // CHECK: store <4 x double> zeroinitializer, <4 x double>* %tmp, align 32 return _mm256_cmp_pd(a, b, _CMP_FALSE_OS); } + +__m128 test_mm_cmp_ps_true(__m128 a, __m128 b) { + // CHECK-LABEL: @test_mm_cmp_ps_true + // CHECK: store <4 x float> zeroinitializer, <4 x float>* %tmp, align 16 + return _mm_cmp_ps(a, b, _CMP_FALSE_OQ); +} + +__m128 test_mm_cmp_pd_false(__m128 a, __m128 b) { + // CHECK-LABEL: @test_mm_cmp_pd_false + // CHECK: store <2 x double> zeroinitializer, <2 x double>* %tmp, align 16 + return _mm_cmp_pd(a, b, _CMP_FALSE_OQ); +} + +__m128 test_mm_cmp_ps_strue(__m128 a, __m128 b) { + // CHECK-LABEL: @test_mm_cmp_ps_strue + // CHECK: store <4 x float> zeroinitializer, <4 x float>* %tmp, align 16 + return _mm_cmp_ps(a, b, _CMP_FALSE_OS); +} + +__m128 test_mm_cmp_pd_sfalse(__m128 a, __m128 b) { + // CHECK-LABEL: @test_mm_cmp_pd_sfalse + // CHECK: store <2 x double> zeroinitializer, <2 x double>* %tmp, align 16 + return _mm_cmp_pd(a, b, _CMP_FALSE_OS); +} Index: test/CodeGen/avx2-builtins-fast-math.c =================================================================== --- /dev/null +++ test/CodeGen/avx2-builtins-fast-math.c @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -ffast-math -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -ffast-math -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s + + +#include + +__m256d test_mm256_i32gather_pd(double const *b, __m128i c) { + // CHECK-LABEL: test_mm256_i32gather_pd + // CHECK: [[CMP:%.*]] = fcmp fast oeq <4 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double> + // CHECK: call fast <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2) + return _mm256_i32gather_pd(b, c, 2); +} + +__m256 test_mm256_i32gather_ps(float const *b, __m256i c) { + // CHECK-LABEL: test_mm256_i32gather_ps + // CHECK: [[CMP:%.*]] = fcmp fast oeq <8 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float> + // CHECK: call fast <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2) + return _mm256_i32gather_ps(b, c, 2); +} + +__m128d test_mm_mask_i64gather_pd(__m128d a, double const *b, __m128i c, __m128d d) { + // CHECK-LABEL: test_mm_mask_i64gather_pd + // CHECK: call fast <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %{{.*}}, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2) + return _mm_mask_i64gather_pd(a, b, c, d, 2); +} Index: test/CodeGen/avx2-builtins.c =================================================================== --- test/CodeGen/avx2-builtins.c +++ test/CodeGen/avx2-builtins.c @@ -504,9 +504,7 @@ __m256d test_mm256_i32gather_pd(double const *b, __m128i c) { // CHECK-LABEL: test_mm256_i32gather_pd - // CHECK: [[CMP:%.*]] = fcmp oeq <4 x double> - // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> - // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double> + // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 0) // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2) return _mm256_i32gather_pd(b, c, 2); } @@ -534,9 +532,7 @@ __m256 test_mm256_i32gather_ps(float const *b, __m256i c) { // CHECK-LABEL: test_mm256_i32gather_ps - // CHECK: [[CMP:%.*]] = fcmp oeq <8 x float> - // CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> - // CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float> + // CHECK: call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i8 0) // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2) return _mm256_i32gather_ps(b, c, 2); } @@ -612,9 +608,7 @@ __m256d test_mm256_i64gather_pd(double const *b, __m256i c) { // CHECK-LABEL: test_mm256_i64gather_pd - // CHECK: [[CMP:%.*]] = fcmp oeq <4 x double> - // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> - // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double> + // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 0) // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2) return _mm256_i64gather_pd(b, c, 2); } Index: test/CodeGen/avx512f-builtins-fast-math.c =================================================================== --- /dev/null +++ test/CodeGen/avx512f-builtins-fast-math.c @@ -0,0 +1,99 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -ffast-math -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +__mmask8 test_mm512_cmpeq_pd_mask(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmpeq_pd_mask + // CHECK: fcmp fast oeq <8 x double> %{{.*}}, %{{.*}} + return _mm512_cmpeq_pd_mask(a, b); +} + +__mmask8 test_mm512_cmpeq_ps_mask(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmpeq_ps_mask + // CHECK: fcmp fast oeq <16 x float> %{{.*}}, %{{.*}} + return _mm512_cmpeq_ps_mask(a, b); +} + +__mmask8 test_mm512_cmple_pd_mask(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmple_pd_mask + // CHECK: fcmp fast ole <8 x double> %{{.*}}, %{{.*}} + return _mm512_cmple_pd_mask(a, b); +} + +__mmask8 test_mm512_cmple_ps_mask(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmple_ps_mask + // CHECK: fcmp fast ole <16 x float> %{{.*}}, %{{.*}} + return _mm512_cmple_ps_mask(a, b); +} + +__mmask8 test_mm512_cmplt_pd_mask(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmplt_pd_mask + // CHECK: fcmp fast olt <8 x double> %{{.*}}, %{{.*}} + return _mm512_cmplt_pd_mask(a, b); +} + +__mmask8 test_mm512_cmplt_ps_mask(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmplt_ps_mask + // CHECK: fcmp fast olt <16 x float> %{{.*}}, %{{.*}} + return _mm512_cmplt_ps_mask(a, b); +} + +__mmask8 test_mm512_cmpneq_pd_mask(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmpneq_pd_mask + // CHECK: fcmp fast une <8 x double> %{{.*}}, %{{.*}} + return _mm512_cmpneq_pd_mask(a, b); +} + +__mmask8 test_mm512_cmpneq_ps_mask(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmpneq_ps_mask + // CHECK: fcmp fast une <16 x float> %{{.*}}, %{{.*}} + return _mm512_cmpneq_ps_mask(a, b); +} + +__mmask8 test_mm512_cmpnle_pd_mask(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmpnle_pd_mask + // CHECK: fcmp fast ugt <8 x double> %{{.*}}, %{{.*}} + return _mm512_cmpnle_pd_mask(a, b); +} + +__mmask8 test_mm512_cmpnle_ps_mask(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmpnle_ps_mask + // CHECK: fcmp fast ugt <16 x float> %{{.*}}, %{{.*}} + return _mm512_cmpnle_ps_mask(a, b); +} + +__mmask8 test_mm512_cmpnlt_pd_mask(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmpnlt_pd_mask + // CHECK: fcmp fast uge <8 x double> %{{.*}}, %{{.*}} + return _mm512_cmpnlt_pd_mask(a, b); +} + +__mmask8 test_mm512_cmpnlt_ps_mask(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmpnlt_ps_mask + // CHECK: fcmp fast uge <16 x float> %{{.*}}, %{{.*}} + return _mm512_cmpnlt_ps_mask(a, b); +} + +__mmask8 test_mm512_cmpord_pd_mask(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmpord_pd_mask + // CHECK: fcmp fast ord <8 x double> %{{.*}}, %{{.*}} + return _mm512_cmpord_pd_mask(a, b); +} + +__mmask8 test_mm512_cmpord_ps_mask(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmpord_ps_mask + // CHECK: fcmp fast ord <16 x float> %{{.*}}, %{{.*}} + return _mm512_cmpord_ps_mask(a, b); +} + +__mmask8 test_mm512_cmpunord_pd_mask(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmpunord_pd_mask + // CHECK: fcmp fast uno <8 x double> %{{.*}}, %{{.*}} + return _mm512_cmpunord_pd_mask(a, b); +} + +__mmask8 test_mm512_cmpunord_ps_mask(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmpunord_ps_mask + // CHECK: fcmp fast uno <16 x float> %{{.*}}, %{{.*}} + return _mm512_cmpunord_ps_mask(a, b); +} Index: test/CodeGen/avx512f-builtins.c =================================================================== --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -1018,6 +1018,34 @@ return _mm512_cmp_ps_mask(a, b, 0); } +__mmask16 test_mm512_cmp_ps_mask_true_uq(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmp_ps_mask_true_uq + // CHECK-NOT: call + // CHECK: store i16 -1 + return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_UQ); +} + +__mmask16 test_mm512_cmp_ps_mask_true_us(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmp_ps_mask_true_us + // CHECK-NOT: call + // CHECK: store i16 -1 + return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_US); +} + +__mmask16 test_mm512_cmp_ps_mask_false_oq(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmp_ps_mask_false_oq + // CHECK-NOT: call + // CHECK: store i16 0 + return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OQ); +} + +__mmask16 test_mm512_cmp_ps_mask_false_os(__m512 a, __m512 b) { + // CHECK-LABEL: @test_mm512_cmp_ps_mask_false_os + // CHECK-NOT: call + // CHECK: store i16 0 + return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OS); +} + __mmask16 test_mm512_mask_cmp_ps_mask(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmp_ps_mask // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512 @@ -1044,6 +1072,34 @@ return _mm512_cmp_pd_mask(a, b, 0); } +__mmask8 test_mm512_cmp_pd_mask_true_uq(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmp_pd_mask_true_uq + // CHECK-NOT: call + // CHECK: store i8 -1 + return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_UQ); +} + +__mmask8 test_mm512_cmp_pd_mask_true_us(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmp_pd_mask_true_us + // CHECK-NOT: call + // CHECK: store i8 -1 + return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_US); +} + +__mmask8 test_mm512_cmp_pd_mask_false_oq(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmp_pd_mask_false_oq + // CHECK-NOT: call + // CHECK: store i8 0 + return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OQ); +} + +__mmask8 test_mm512_cmp_pd_mask_false_os(__m512d a, __m512d b) { + // CHECK-LABEL: @test_mm512_cmp_pd_mask_false_os + // CHECK-NOT: call + // CHECK: store i8 0 + return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OS); +} + __mmask8 test_mm512_mask_cmp_pd_mask(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmp_pd_mask // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512 Index: test/CodeGen/avx512vl-builtins-fast-math.c =================================================================== --- /dev/null +++ test/CodeGen/avx512vl-builtins-fast-math.c @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -ffast-math -o - -Wall -Werror | FileCheck %s + + +#include + +__mmask8 test_mm256_cmp_ps_mask(__m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_cmp_ps_mask + // CHECK: fcmp fast oeq <8 x float> %{{.*}}, %{{.*}} + return (__mmask8)_mm256_cmp_ps_mask(__A, __B, 0); +} + +__mmask8 test_mm256_mask_cmp_ps_mask(__mmask8 m, __m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_mask_cmp_ps_mask + // CHECK: fcmp fast oeq <8 x float> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_ps_mask(m, __A, __B, 0); +} + +__mmask8 test_mm_cmp_ps_mask(__m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_cmp_ps_mask + // CHECK: fcmp fast oeq <4 x float> %{{.*}}, %{{.*}} + return (__mmask8)_mm_cmp_ps_mask(__A, __B, 0); +} + +__mmask8 test_mm_mask_cmp_ps_mask(__mmask8 m, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_cmp_ps_mask + // CHECK: fcmp fast oeq <4 x float> %{{.*}}, %{{.*}} + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> + // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_ps_mask(m, __A, __B, 0); +} + +__mmask8 test_mm256_cmp_pd_mask(__m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_cmp_pd_mask + // CHECK: fcmp fast oeq <4 x double> %{{.*}}, %{{.*}} + return (__mmask8)_mm256_cmp_pd_mask(__A, __B, 0); +} + +__mmask8 test_mm256_mask_cmp_pd_mask(__mmask8 m, __m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_mask_cmp_pd_mask + // CHECK: fcmp fast oeq <4 x double> %{{.*}}, %{{.*}} + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> + // CHECK: and <4 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pd_mask(m, __A, __B, 0); +} + +__mmask8 test_mm_cmp_pd_mask(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_cmp_pd_mask + // CHECK: fcmp fast oeq <2 x double> %{{.*}}, %{{.*}} + return (__mmask8)_mm_cmp_pd_mask(__A, __B, 0); +} + +__mmask8 test_mm_mask_cmp_pd_mask(__mmask8 m, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_mask_cmp_pd_mask + // CHECK: fcmp fast oeq <2 x double> %{{.*}}, %{{.*}} + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> + // CHECK: and <2 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pd_mask(m, __A, __B, 0); +} Index: test/CodeGen/avx512vl-builtins.c =================================================================== --- test/CodeGen/avx512vl-builtins.c +++ test/CodeGen/avx512vl-builtins.c @@ -1077,6 +1077,34 @@ return (__mmask8)_mm256_cmp_ps_mask(__A, __B, 0); } +__mmask8 test_mm256_cmp_ps_mask_true_uq(__m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_cmp_ps_mask_true_uq + // CHECK-NOT: call + // CHECK: store i8 -1 + return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_TRUE_UQ); +} + +__mmask8 test_mm256_cmp_ps_mask_true_us(__m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_cmp_ps_mask_true_us + // CHECK-NOT: call + // CHECK: store i8 -1 + return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_TRUE_US); +} + +__mmask8 test_mm256_cmp_ps_mask_false_oq(__m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_cmp_ps_mask_false_oq + // CHECK-NOT: call + // CHECK: store i8 0 + return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_FALSE_OQ); +} + +__mmask8 test_mm256_cmp_ps_mask_false_os(__m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_cmp_ps_mask_false_os + // CHECK-NOT: call + // CHECK: store i8 0 + return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_FALSE_OS); +} + __mmask8 test_mm256_mask_cmp_ps_mask(__mmask8 m, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_mask_cmp_ps_mask // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256 @@ -1090,6 +1118,34 @@ return (__mmask8)_mm_cmp_ps_mask(__A, __B, 0); } +__mmask8 test_mm_cmp_ps_mask_true_uq(__m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_cmp_ps_mask_true_uq + // CHECK-NOT: call + // CHECK: store i8 -1 + return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_TRUE_UQ); +} + +__mmask8 test_mm_cmp_ps_mask_true_us(__m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_cmp_ps_mask_true_us + // CHECK-NOT: call + // CHECK: store i8 -1 + return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_TRUE_US); +} + +__mmask8 test_mm_cmp_ps_mask_false_oq(__m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_cmp_ps_mask_false_oq + // CHECK-NOT: call + // CHECK: store i8 0 + return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_FALSE_OQ); +} + +__mmask8 test_mm_cmp_ps_mask_false_os(__m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_cmp_ps_mask_false_os + // CHECK-NOT: call + // CHECK: store i8 0 + return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_FALSE_OS); +} + __mmask8 test_mm_mask_cmp_ps_mask(__mmask8 m, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_cmp_ps_mask // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128 @@ -1103,6 +1159,34 @@ return (__mmask8)_mm256_cmp_pd_mask(__A, __B, 0); } +__mmask8 test_mm256_cmp_pd_mask_true_uq(__m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_cmp_pd_mask_true_uq + // CHECK-NOT: call + // CHECK: store i8 -1 + return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_TRUE_UQ); +} + +__mmask8 test_mm256_cmp_pd_mask_true_us(__m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_cmp_pd_mask_true_us + // CHECK-NOT: call + // CHECK: store i8 -1 + return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_TRUE_US); +} + +__mmask8 test_mm256_cmp_pd_mask_false_oq(__m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_cmp_pd_mask_false_oq + // CHECK-NOT: call + // CHECK: store i8 0 + return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_FALSE_OQ); +} + +__mmask8 test_mm256_cmp_pd_mask_false_os(__m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_cmp_pd_mask_false_os + // CHECK-NOT: call + // CHECK: store i8 0 + return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_FALSE_OS); +} + __mmask8 test_mm256_mask_cmp_pd_mask(__mmask8 m, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_mask_cmp_pd_mask // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256 @@ -1116,6 +1200,34 @@ return (__mmask8)_mm_cmp_pd_mask(__A, __B, 0); } +__mmask8 test_mm_cmp_pd_mask_true_uq(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_cmp_pd_mask_true_uq + // CHECK-NOT: call + // CHECK: store i8 -1 + return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_TRUE_UQ); +} + +__mmask8 test_mm_cmp_pd_mask_true_us(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_cmp_pd_mask_true_us + // CHECK-NOT: call + // CHECK: store i8 -1 + return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_TRUE_US); +} + +__mmask8 test_mm_cmp_pd_mask_false_oq(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_cmp_pd_mask_false_oq + // CHECK-NOT: call + // CHECK: store i8 0 + return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_FALSE_OQ); +} + +__mmask8 test_mm_cmp_pd_mask_false_os(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_cmp_pd_mask_false_os + // CHECK-NOT: call + // CHECK: store i8 0 + return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_FALSE_OS); +} + __mmask8 test_mm_mask_cmp_pd_mask(__mmask8 m, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_cmp_pd_mask // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128