diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -12264,32 +12264,46 @@ case X86::BI__builtin_ia32_cvtdq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2ps512_mask: - case X86::BI__builtin_ia32_cvtqq2pd512_mask: + case X86::BI__builtin_ia32_cvtqq2pd512_mask: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true); + } case X86::BI__builtin_ia32_cvtudq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2ps512_mask: - case X86::BI__builtin_ia32_cvtuqq2pd512_mask: + case X86::BI__builtin_ia32_cvtuqq2pd512_mask: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false); + } case X86::BI__builtin_ia32_vfmaddss3: case X86::BI__builtin_ia32_vfmaddsd3: case X86::BI__builtin_ia32_vfmaddss3_mask: - case X86::BI__builtin_ia32_vfmaddsd3_mask: + case X86::BI__builtin_ia32_vfmaddsd3_mask: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitScalarFMAExpr(*this, Ops, Ops[0]); + } case X86::BI__builtin_ia32_vfmaddss: - case X86::BI__builtin_ia32_vfmaddsd: + case X86::BI__builtin_ia32_vfmaddsd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitScalarFMAExpr(*this, Ops, Constant::getNullValue(Ops[0]->getType())); + } case X86::BI__builtin_ia32_vfmaddss3_maskz: - case X86::BI__builtin_ia32_vfmaddsd3_maskz: + case X86::BI__builtin_ia32_vfmaddsd3_maskz: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true); + } case X86::BI__builtin_ia32_vfmaddss3_mask3: - case X86::BI__builtin_ia32_vfmaddsd3_mask3: + case X86::BI__builtin_ia32_vfmaddsd3_mask3: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2); + } case X86::BI__builtin_ia32_vfmsubss3_mask3: - case X86::BI__builtin_ia32_vfmsubsd3_mask3: + case X86::BI__builtin_ia32_vfmsubsd3_mask3: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2, /*NegAcc*/true); + } case X86::BI__builtin_ia32_vfmaddps: case X86::BI__builtin_ia32_vfmaddpd: case X86::BI__builtin_ia32_vfmaddps256: @@ -12301,8 +12315,10 @@ case X86::BI__builtin_ia32_vfmaddpd512_mask: case X86::BI__builtin_ia32_vfmaddpd512_maskz: case X86::BI__builtin_ia32_vfmaddpd512_mask3: - case X86::BI__builtin_ia32_vfmsubpd512_mask3: + case X86::BI__builtin_ia32_vfmsubpd512_mask3: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false); + } case X86::BI__builtin_ia32_vfmaddsubps512_mask: case X86::BI__builtin_ia32_vfmaddsubps512_maskz: case X86::BI__builtin_ia32_vfmaddsubps512_mask3: @@ -12310,8 +12326,10 @@ case X86::BI__builtin_ia32_vfmaddsubpd512_mask: case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: - case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: + case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true); + } case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: @@ -13457,6 +13475,7 @@ Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); Function *F; if (Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, A->getType()); A = Builder.CreateConstrainedFPCall(F, {A}); @@ -13480,6 +13499,7 @@ Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); Function *F; if (Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, A->getType()); A = Builder.CreateConstrainedFPCall(F, A); @@ -13509,6 +13529,7 @@ } } if (Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, Ops[0]->getType()); return Builder.CreateConstrainedFPCall(F, Ops[0]); @@ -13917,29 +13938,45 @@ // packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: - case X86::BI__builtin_ia32_cmpeqpd: + case X86::BI__builtin_ia32_cmpeqpd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false); + } case X86::BI__builtin_ia32_cmpltps: - case X86::BI__builtin_ia32_cmpltpd: + case X86::BI__builtin_ia32_cmpltpd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true); + } case X86::BI__builtin_ia32_cmpleps: - case X86::BI__builtin_ia32_cmplepd: + case X86::BI__builtin_ia32_cmplepd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true); + } case X86::BI__builtin_ia32_cmpunordps: - case X86::BI__builtin_ia32_cmpunordpd: + case X86::BI__builtin_ia32_cmpunordpd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false); + } case X86::BI__builtin_ia32_cmpneqps: - case X86::BI__builtin_ia32_cmpneqpd: + case X86::BI__builtin_ia32_cmpneqpd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false); + } case X86::BI__builtin_ia32_cmpnltps: - case X86::BI__builtin_ia32_cmpnltpd: + case X86::BI__builtin_ia32_cmpnltpd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true); + } case X86::BI__builtin_ia32_cmpnleps: - case X86::BI__builtin_ia32_cmpnlepd: + case X86::BI__builtin_ia32_cmpnlepd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true); + } case X86::BI__builtin_ia32_cmpordps: - case X86::BI__builtin_ia32_cmpordpd: + case X86::BI__builtin_ia32_cmpordpd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false); + } case X86::BI__builtin_ia32_cmpps128_mask: case X86::BI__builtin_ia32_cmpps256_mask: case X86::BI__builtin_ia32_cmpps512_mask: @@ -14053,6 +14090,8 @@ if (IsMaskFCmp) { // We ignore SAE if strict FP is disabled. We only keep precise // exception behavior under strict FP. + // NOTE: If strict FP does ever go through here a CGFPOptionsRAII + // object will be required. unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); Value *Cmp; @@ -14063,6 +14102,7 @@ return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]); } + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(Pred, IsSignaling); } @@ -14105,8 +14145,10 @@ case X86::BI__builtin_ia32_vcvtph2ps256: case X86::BI__builtin_ia32_vcvtph2ps_mask: case X86::BI__builtin_ia32_vcvtph2ps256_mask: - case X86::BI__builtin_ia32_vcvtph2ps512_mask: + case X86::BI__builtin_ia32_vcvtph2ps512_mask: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType())); + } // AVX512 bf16 intrinsics case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { diff --git a/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c b/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c --- a/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c +++ b/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c @@ -1,4 +1,9 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=maytrap -o - -Wall -Werror | FileCheck %s + +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) #include diff --git a/clang/test/CodeGen/X86/avx512dq-builtins-constrained.c b/clang/test/CodeGen/X86/avx512dq-builtins-constrained.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/X86/avx512dq-builtins-constrained.c @@ -0,0 +1,269 @@ +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | tee /tmp/X | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON + +// FIXME: Every instance of "fpexcept.maytrap" is wrong. +#ifdef STRICT +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) +#endif + + +#include + +__m512d test_mm512_cvtepi64_pd(__m512i __A) { + // COMMON-LABEL: test_mm512_cvtepi64_pd + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // CHECK-ASM: vcvtqq2pd + return _mm512_cvtepi64_pd(__A); +} + +__m512d test_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvtepi64_pd + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtqq2pd + return _mm512_mask_cvtepi64_pd(__W, __U, __A); +} + +__m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvtepi64_pd + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtqq2pd + return _mm512_maskz_cvtepi64_pd(__U, __A); +} + +__m512d test_mm512_cvt_roundepi64_pd(__m512i __A) { + // COMMON-LABEL: test_mm512_cvt_roundepi64_pd + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f64.v8i64 + // CHECK-ASM: vcvtqq2pd + return _mm512_cvt_roundepi64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_mask_cvt_roundepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvt_roundepi64_pd + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f64.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtqq2pd + return _mm512_mask_cvt_roundepi64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_maskz_cvt_roundepi64_pd(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvt_roundepi64_pd + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f64.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtqq2pd + return _mm512_maskz_cvt_roundepi64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_cvtepi64_ps(__m512i __A) { + // COMMON-LABEL: test_mm512_cvtepi64_ps + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CHECK-ASM: vcvtqq2ps + return _mm512_cvtepi64_ps(__A); +} + +__m256 test_mm512_mask_cvtepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvtepi64_ps + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtqq2ps + return _mm512_mask_cvtepi64_ps(__W, __U, __A); +} + +__m256 test_mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvtepi64_ps + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtqq2ps + return _mm512_maskz_cvtepi64_ps(__U, __A); +} + +__m256 test_mm512_cvt_roundepi64_ps(__m512i __A) { + // COMMON-LABEL: test_mm512_cvt_roundepi64_ps + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64 + // CHECK-ASM: vcvtqq2ps + return _mm512_cvt_roundepi64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_mask_cvt_roundepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvt_roundepi64_ps + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtqq2ps + return _mm512_mask_cvt_roundepi64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_maskz_cvt_roundepi64_ps(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvt_roundepi64_ps + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtqq2ps + return _mm512_maskz_cvt_roundepi64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_cvtepu64_pd(__m512i __A) { + // COMMON-LABEL: test_mm512_cvtepu64_pd + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // CHECK-ASM: vcvtuqq2pd + return _mm512_cvtepu64_pd(__A); +} + +__m512d test_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvtepu64_pd + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtuqq2pd + return _mm512_mask_cvtepu64_pd(__W, __U, __A); +} + +__m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvtepu64_pd + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtuqq2pd + return _mm512_maskz_cvtepu64_pd(__U, __A); +} + +__m512d test_mm512_cvt_roundepu64_pd(__m512i __A) { + // COMMON-LABEL: test_mm512_cvt_roundepu64_pd + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f64.v8i64 + // CHECK-ASM: vcvtuqq2pd + return _mm512_cvt_roundepu64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_mask_cvt_roundepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvt_roundepu64_pd + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f64.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtuqq2pd + return _mm512_mask_cvt_roundepu64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_maskz_cvt_roundepu64_pd(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvt_roundepu64_pd + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f64.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtuqq2pd + return _mm512_maskz_cvt_roundepu64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_cvtepu64_ps(__m512i __A) { + // COMMON-LABEL: test_mm512_cvtepu64_ps + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CHECK-ASM: vcvtuqq2ps + return _mm512_cvtepu64_ps(__A); +} + +__m256 test_mm512_mask_cvtepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvtepu64_ps + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtuqq2ps + return _mm512_mask_cvtepu64_ps(__W, __U, __A); +} + +__m256 test_mm512_maskz_cvtepu64_ps(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvtepu64_ps + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtuqq2ps + return _mm512_maskz_cvtepu64_ps(__U, __A); +} + +__m256 test_mm512_cvt_roundepu64_ps(__m512i __A) { + // COMMON-LABEL: test_mm512_cvt_roundepu64_ps + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f32.v8i64 + // CHECK-ASM: vcvtuqq2ps + return _mm512_cvt_roundepu64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_mask_cvt_roundepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvt_roundepu64_ps + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f32.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtuqq2ps + return _mm512_mask_cvt_roundepu64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_maskz_cvt_roundepu64_ps(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvt_roundepu64_ps + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f32.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtuqq2ps + return _mm512_maskz_cvt_roundepu64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + diff --git a/clang/test/CodeGen/X86/avx512f-builtins-constrained.c b/clang/test/CodeGen/X86/avx512f-builtins-constrained.c --- a/clang/test/CodeGen/X86/avx512f-builtins-constrained.c +++ b/clang/test/CodeGen/X86/avx512f-builtins-constrained.c @@ -1,10 +1,17 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s // RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s -// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s -// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -fms-compatibility -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s +// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s +// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -fms-compatibility -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s // RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -S -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s -// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=strict -S -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s +// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s + +#ifdef STRICT +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) +#endif #include diff --git a/clang/test/CodeGen/X86/fma-builtins-constrained.c b/clang/test/CodeGen/X86/fma-builtins-constrained.c --- a/clang/test/CodeGen/X86/fma-builtins-constrained.c +++ b/clang/test/CodeGen/X86/fma-builtins-constrained.c @@ -1,8 +1,15 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,UNCONSTRAINED -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -ffp-exception-behavior=strict -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,CONSTRAINED +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -ffp-exception-behavior=maytrap -DSTRICT=1 -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,CONSTRAINED // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -ffp-exception-behavior=strict -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM + +#ifdef STRICT +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) +#endif #include diff --git a/clang/test/CodeGen/X86/sse-builtins-constrained.c b/clang/test/CodeGen/X86/sse-builtins-constrained.c --- a/clang/test/CodeGen/X86/sse-builtins-constrained.c +++ b/clang/test/CodeGen/X86/sse-builtins-constrained.c @@ -1,8 +1,15 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=strict -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=maytrap -DSTRICT=1 -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON + +#ifdef STRICT +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) +#endif #include