diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -11664,7 +11664,7 @@ return EmitX86MaskedCompare(CGF, 1, true, { In, Zero }); } -static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, +static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef Ops, bool IsSigned) { unsigned Rnd = cast(Ops[3])->getZExtValue(); llvm::Type *Ty = Ops[1]->getType(); @@ -11676,6 +11676,7 @@ Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() }); Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] }); } else { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty) : CGF.Builder.CreateUIToFP(Ops[0], Ty); } @@ -11684,8 +11685,9 @@ } // Lowers X86 FMA intrinsics to IR. -static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef Ops, - unsigned BuiltinID, bool IsAddSub) { +static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, + ArrayRef Ops, unsigned BuiltinID, + bool IsAddSub) { bool Subtract = false; Intrinsic::ID IID = Intrinsic::not_intrinsic; @@ -11742,6 +11744,7 @@ llvm::Type *Ty = A->getType(); Function *FMA; if (CGF.Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty); Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C}); } else { @@ -11783,10 +11786,10 @@ return Res; } -static Value * -EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef Ops, - Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0, - bool NegAcc = false) { +static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, + MutableArrayRef Ops, Value *Upper, + bool ZeroMask = false, unsigned PTIdx = 0, + bool NegAcc = false) { unsigned Rnd = 4; if (Ops.size() > 4) Rnd = cast(Ops[4])->getZExtValue(); @@ -11805,6 +11808,7 @@ Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2], Ops[4]}); } else if (CGF.Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *FMA = CGF.CGM.getIntrinsic( Intrinsic::experimental_constrained_fma, Ops[0]->getType()); Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3)); @@ -12142,8 +12146,9 @@ // TODO: The builtins could be removed if the SSE header files used vector // extension comparisons directly (vector ordered/unordered may need // additional support via __builtin_isnan()). - auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred, - bool IsSignaling) { + auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred, + bool IsSignaling) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *Cmp; if (IsSignaling) Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]); @@ -12385,31 +12390,31 @@ case X86::BI__builtin_ia32_cvtdq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2pd512_mask: - return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true); + return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true); case X86::BI__builtin_ia32_cvtudq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2pd512_mask: - return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false); + return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false); case X86::BI__builtin_ia32_vfmaddss3: case X86::BI__builtin_ia32_vfmaddsd3: case X86::BI__builtin_ia32_vfmaddss3_mask: case X86::BI__builtin_ia32_vfmaddsd3_mask: - return EmitScalarFMAExpr(*this, Ops, Ops[0]); + return EmitScalarFMAExpr(*this, E, Ops, Ops[0]); case X86::BI__builtin_ia32_vfmaddss: case X86::BI__builtin_ia32_vfmaddsd: - return EmitScalarFMAExpr(*this, Ops, + return EmitScalarFMAExpr(*this, E, Ops, Constant::getNullValue(Ops[0]->getType())); case X86::BI__builtin_ia32_vfmaddss3_maskz: case X86::BI__builtin_ia32_vfmaddsd3_maskz: - return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true); + return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true); case X86::BI__builtin_ia32_vfmaddss3_mask3: case X86::BI__builtin_ia32_vfmaddsd3_mask3: - return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2); + return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2); case X86::BI__builtin_ia32_vfmsubss3_mask3: case X86::BI__builtin_ia32_vfmsubsd3_mask3: - return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2, - /*NegAcc*/true); + return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2, + /*NegAcc*/ true); case X86::BI__builtin_ia32_vfmaddps: case X86::BI__builtin_ia32_vfmaddpd: case X86::BI__builtin_ia32_vfmaddps256: @@ -12422,7 +12427,7 @@ case X86::BI__builtin_ia32_vfmaddpd512_maskz: case X86::BI__builtin_ia32_vfmaddpd512_mask3: case X86::BI__builtin_ia32_vfmsubpd512_mask3: - return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false); + return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false); case X86::BI__builtin_ia32_vfmaddsubps512_mask: case X86::BI__builtin_ia32_vfmaddsubps512_maskz: case X86::BI__builtin_ia32_vfmaddsubps512_mask3: @@ -12431,7 +12436,7 @@ case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: - return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true); + return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true); case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: @@ -13577,6 +13582,7 @@ Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); Function *F; if (Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, A->getType()); A = Builder.CreateConstrainedFPCall(F, {A}); @@ -13600,6 +13606,7 @@ Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); Function *F; if (Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, A->getType()); A = Builder.CreateConstrainedFPCall(F, A); @@ -13629,6 +13636,7 @@ } } if (Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, Ops[0]->getType()); return Builder.CreateConstrainedFPCall(F, Ops[0]); @@ -14173,6 +14181,8 @@ if (IsMaskFCmp) { // We ignore SAE if strict FP is disabled. We only keep precise // exception behavior under strict FP. + // NOTE: If strict FP does ever go through here a CGFPOptionsRAII + // object will be required. unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); Value *Cmp; @@ -14225,8 +14235,10 @@ case X86::BI__builtin_ia32_vcvtph2ps256: case X86::BI__builtin_ia32_vcvtph2ps_mask: case X86::BI__builtin_ia32_vcvtph2ps256_mask: - case X86::BI__builtin_ia32_vcvtph2ps512_mask: + case X86::BI__builtin_ia32_vcvtph2ps512_mask: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType())); + } // AVX512 bf16 intrinsics case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { diff --git a/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c b/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c --- a/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c +++ b/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c @@ -1,4 +1,9 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=maytrap -o - -Wall -Werror | FileCheck %s + +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) #include diff --git a/clang/test/CodeGen/X86/avx512dq-builtins-constrained.c b/clang/test/CodeGen/X86/avx512dq-builtins-constrained.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/X86/avx512dq-builtins-constrained.c @@ -0,0 +1,269 @@ +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON + +// FIXME: Every instance of "fpexcept.maytrap" is wrong. +#ifdef STRICT +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) +#endif + + +#include + +__m512d test_mm512_cvtepi64_pd(__m512i __A) { + // COMMON-LABEL: test_mm512_cvtepi64_pd + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // CHECK-ASM: vcvtqq2pd + return _mm512_cvtepi64_pd(__A); +} + +__m512d test_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvtepi64_pd + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtqq2pd + return _mm512_mask_cvtepi64_pd(__W, __U, __A); +} + +__m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvtepi64_pd + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtqq2pd + return _mm512_maskz_cvtepi64_pd(__U, __A); +} + +__m512d test_mm512_cvt_roundepi64_pd(__m512i __A) { + // COMMON-LABEL: test_mm512_cvt_roundepi64_pd + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f64.v8i64 + // CHECK-ASM: vcvtqq2pd + return _mm512_cvt_roundepi64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_mask_cvt_roundepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvt_roundepi64_pd + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f64.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtqq2pd + return _mm512_mask_cvt_roundepi64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_maskz_cvt_roundepi64_pd(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvt_roundepi64_pd + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f64.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtqq2pd + return _mm512_maskz_cvt_roundepi64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_cvtepi64_ps(__m512i __A) { + // COMMON-LABEL: test_mm512_cvtepi64_ps + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CHECK-ASM: vcvtqq2ps + return _mm512_cvtepi64_ps(__A); +} + +__m256 test_mm512_mask_cvtepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvtepi64_ps + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtqq2ps + return _mm512_mask_cvtepi64_ps(__W, __U, __A); +} + +__m256 test_mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvtepi64_ps + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtqq2ps + return _mm512_maskz_cvtepi64_ps(__U, __A); +} + +__m256 test_mm512_cvt_roundepi64_ps(__m512i __A) { + // COMMON-LABEL: test_mm512_cvt_roundepi64_ps + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64 + // CHECK-ASM: vcvtqq2ps + return _mm512_cvt_roundepi64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_mask_cvt_roundepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvt_roundepi64_ps + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtqq2ps + return _mm512_mask_cvt_roundepi64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_maskz_cvt_roundepi64_ps(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvt_roundepi64_ps + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtqq2ps + return _mm512_maskz_cvt_roundepi64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_cvtepu64_pd(__m512i __A) { + // COMMON-LABEL: test_mm512_cvtepu64_pd + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // CHECK-ASM: vcvtuqq2pd + return _mm512_cvtepu64_pd(__A); +} + +__m512d test_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvtepu64_pd + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtuqq2pd + return _mm512_mask_cvtepu64_pd(__W, __U, __A); +} + +__m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvtepu64_pd + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtuqq2pd + return _mm512_maskz_cvtepu64_pd(__U, __A); +} + +__m512d test_mm512_cvt_roundepu64_pd(__m512i __A) { + // COMMON-LABEL: test_mm512_cvt_roundepu64_pd + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f64.v8i64 + // CHECK-ASM: vcvtuqq2pd + return _mm512_cvt_roundepu64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_mask_cvt_roundepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvt_roundepu64_pd + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f64.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtuqq2pd + return _mm512_mask_cvt_roundepu64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_maskz_cvt_roundepu64_pd(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvt_roundepu64_pd + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f64.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtuqq2pd + return _mm512_maskz_cvt_roundepu64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_cvtepu64_ps(__m512i __A) { + // COMMON-LABEL: test_mm512_cvtepu64_ps + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CHECK-ASM: vcvtuqq2ps + return _mm512_cvtepu64_ps(__A); +} + +__m256 test_mm512_mask_cvtepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvtepu64_ps + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtuqq2ps + return _mm512_mask_cvtepu64_ps(__W, __U, __A); +} + +__m256 test_mm512_maskz_cvtepu64_ps(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvtepu64_ps + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtuqq2ps + return _mm512_maskz_cvtepu64_ps(__U, __A); +} + +__m256 test_mm512_cvt_roundepu64_ps(__m512i __A) { + // COMMON-LABEL: test_mm512_cvt_roundepu64_ps + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f32.v8i64 + // CHECK-ASM: vcvtuqq2ps + return _mm512_cvt_roundepu64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_mask_cvt_roundepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvt_roundepu64_ps + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f32.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtuqq2ps + return _mm512_mask_cvt_roundepu64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_maskz_cvt_roundepu64_ps(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvt_roundepu64_ps + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f32.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtuqq2ps + return _mm512_maskz_cvt_roundepu64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + diff --git a/clang/test/CodeGen/X86/avx512f-builtins-constrained.c b/clang/test/CodeGen/X86/avx512f-builtins-constrained.c --- a/clang/test/CodeGen/X86/avx512f-builtins-constrained.c +++ b/clang/test/CodeGen/X86/avx512f-builtins-constrained.c @@ -1,10 +1,17 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s // RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s -// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s -// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -fms-compatibility -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s +// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s +// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -fms-compatibility -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s // RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -S -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s -// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=strict -S -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s +// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s + +#ifdef STRICT +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) +#endif #include diff --git a/clang/test/CodeGen/X86/fma-builtins-constrained.c b/clang/test/CodeGen/X86/fma-builtins-constrained.c --- a/clang/test/CodeGen/X86/fma-builtins-constrained.c +++ b/clang/test/CodeGen/X86/fma-builtins-constrained.c @@ -1,8 +1,15 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,UNCONSTRAINED -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -ffp-exception-behavior=strict -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,CONSTRAINED +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -ffp-exception-behavior=maytrap -DSTRICT=1 -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,CONSTRAINED // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -ffp-exception-behavior=strict -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM + +#ifdef STRICT +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) +#endif #include diff --git a/clang/test/CodeGen/X86/sse-builtins-constrained.c b/clang/test/CodeGen/X86/sse-builtins-constrained.c --- a/clang/test/CodeGen/X86/sse-builtins-constrained.c +++ b/clang/test/CodeGen/X86/sse-builtins-constrained.c @@ -1,8 +1,15 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=strict -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=maytrap -DSTRICT=1 -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON + +#ifdef STRICT +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) +#endif #include