Index: include/clang/Basic/BuiltinsX86.def =================================================================== --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -384,6 +384,10 @@ TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2dIi", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_roundpd, "V2dV2dIi", "", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_ceilps_128_mask, "V4fV4fV4fIi", "", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_ceilpd_128_mask, "V2dV2dV2dIi", "", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_floorps_128_mask, "V4fV4fV4fIi", "", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_floorpd_128_mask, "V2dV2dV2dIi", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "", "sse4.1") @@ -496,6 +500,10 @@ TARGET_BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "", "avx") TARGET_BUILTIN(__builtin_ia32_roundpd256, "V4dV4dIi", "", "avx") TARGET_BUILTIN(__builtin_ia32_roundps256, "V8fV8fIi", "", "avx") +TARGET_BUILTIN(__builtin_ia32_floorpd_256_mask, "V4dV4dV4dIi", "", "avx") +TARGET_BUILTIN(__builtin_ia32_floorps_256_mask, "V8fV8fV4dIi", "", "avx") +TARGET_BUILTIN(__builtin_ia32_ceilpd_256_mask, "V4dV4dV4dIi", "", "avx") +TARGET_BUILTIN(__builtin_ia32_ceilps_256_mask, "V8fV8fV4dIi", "", "avx") TARGET_BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "", "avx") TARGET_BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "", "avx") TARGET_BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "", "avx") @@ -940,6 +948,10 @@ TARGET_BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fIiV16fUsIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dIiV8dUcIi", "", "avx512f") +TARGET_BUILTIN(__builtin_ia32_floorps_mask, "V16fV16fV16fUs", "", "avx512f") +TARGET_BUILTIN(__builtin_ia32_floorpd_mask, "V8dV8dV8dUc", "", "avx512f") +TARGET_BUILTIN(__builtin_ia32_ceilps_mask, "V16fV16fV16fUs", "", "avx512f") +TARGET_BUILTIN(__builtin_ia32_ceilpd_mask, "V8dV8dV8dUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsIi", "", "avx512f") Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -8269,6 +8269,15 @@ return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); } +static Value *EmitX86FloorCeil(CodeGenFunction &CGF, ArrayRef Ops, + Intrinsic::ID ID) { + assert((ID == Intrinsic::ceil || ID == Intrinsic::floor) && + "Unexpected intrinsic ID"); + Value *F = CGF.CGM.getIntrinsic(ID, Ops[0]->getType()); + Value *Res = CGF.Builder.CreateCall(F, {Ops[0]}); + return EmitX86Select(CGF, Ops[2], Res, Ops[1]); +} + Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); StringRef CPUStr = cast(CPUExpr)->getString(); @@ -8899,6 +8908,21 @@ Ops[1]); } + case X86::BI__builtin_ia32_floorps_128_mask: + case X86::BI__builtin_ia32_floorpd_128_mask: + case X86::BI__builtin_ia32_floorps_256_mask: + case X86::BI__builtin_ia32_floorpd_256_mask: + case X86::BI__builtin_ia32_floorps_mask: + case X86::BI__builtin_ia32_floorpd_mask: + return EmitX86FloorCeil(*this, Ops, Intrinsic::floor); + case X86::BI__builtin_ia32_ceilps_128_mask: + case X86::BI__builtin_ia32_ceilpd_128_mask: + case X86::BI__builtin_ia32_ceilps_256_mask: + case X86::BI__builtin_ia32_ceilpd_256_mask: + case X86::BI__builtin_ia32_ceilps_mask: + case X86::BI__builtin_ia32_ceilpd_mask: + return EmitX86FloorCeil(*this, Ops, Intrinsic::ceil); + case X86::BI__builtin_ia32_pabsb128: case X86::BI__builtin_ia32_pabsw128: case X86::BI__builtin_ia32_pabsd128: Index: lib/Headers/avx512fintrin.h =================================================================== --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -1886,73 +1886,49 @@ static __inline __m512 __DEFAULT_FN_ATTRS _mm512_floor_ps(__m512 __A) { - return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, - _MM_FROUND_FLOOR, - (__v16sf) __A, -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_floorps_mask((__v16sf)__A, (__v16sf)__A, -1); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) { - return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, - _MM_FROUND_FLOOR, - (__v16sf) __W, __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_floorps_mask((__v16sf)__A, (__v16sf)__W, __U); } static __inline __m512d __DEFAULT_FN_ATTRS _mm512_floor_pd(__m512d __A) { - return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, - _MM_FROUND_FLOOR, - (__v8df) __A, -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_floorpd_mask((__v8df)__A, (__v8df)__A, -1); } static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, - _MM_FROUND_FLOOR, - (__v8df) __W, __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_floorpd_mask((__v8df)__A, (__v8df)__W, __U); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) { - return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, - _MM_FROUND_CEIL, - (__v16sf) __W, __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_ceilps_mask((__v16sf)__A, (__v16sf)__W, __U); } static __inline __m512 __DEFAULT_FN_ATTRS _mm512_ceil_ps(__m512 __A) { - return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, - _MM_FROUND_CEIL, - (__v16sf) __A, -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_ceilps_mask((__v16sf)__A, (__v16sf)__A, -1); } static __inline __m512d __DEFAULT_FN_ATTRS _mm512_ceil_pd(__m512d __A) { - return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, - _MM_FROUND_CEIL, - (__v8df) __A, -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_ceilpd_mask((__v8df)__A, (__v8df)__A, -1); } static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, - _MM_FROUND_CEIL, - (__v8df) __W, __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_ceilpd_mask((__v8df)__A, (__v8df)__W, __U); } static __inline __m512i __DEFAULT_FN_ATTRS Index: lib/Headers/avxintrin.h =================================================================== --- lib/Headers/avxintrin.h +++ lib/Headers/avxintrin.h @@ -458,7 +458,11 @@ /// \param V /// A 256-bit vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the rounded up values. -#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL) +#define _mm256_ceil_pd(V) \ + __extension__({ \ + (__m256) __builtin_ia32_ceilpd_256_mask((__v4df)(__m256)(V), \ + (__v4df)(__m256)(V), -1); \ + }) /// \brief Rounds down the values stored in a 256-bit vector of [4 x double]. /// The source values are rounded down to integer values and returned as @@ -476,7 +480,11 @@ /// A 256-bit vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the rounded down /// values. -#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR) +#define _mm256_floor_pd(V) \ + __extension__({ \ + (__m256) __builtin_ia32_floorpd_256_mask((__v4df)(__m256)(V), \ + (__v4df)(__m256)(V), -1); \ + }) /// \brief Rounds up the values stored in a 256-bit vector of [8 x float]. The /// source values are rounded up to integer values and returned as @@ -493,7 +501,11 @@ /// \param V /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the rounded up values. -#define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL) +#define _mm256_ceil_ps(V) \ + __extension__({ \ + (__m256) __builtin_ia32_ceilps_256_mask((__v8sf)(__m256)(V), \ + (__v8sf)(__m256)(V), -1); \ + }) /// \brief Rounds down the values stored in a 256-bit vector of [8 x float]. The /// source values are rounded down to integer values and returned as @@ -510,7 +522,11 @@ /// \param V /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the rounded down values. -#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR) +#define _mm256_floor_ps(V) \ + __extension__({ \ + (__m256) __builtin_ia32_floorps_256_mask((__v4df)(__m256)(V), \ + (__v4df)(__m256)(V), -1); \ + }) /* Logical */ /// \brief Performs a bitwise AND of two 256-bit vectors of [4 x double]. Index: lib/Headers/smmintrin.h =================================================================== --- lib/Headers/smmintrin.h +++ lib/Headers/smmintrin.h @@ -61,7 +61,11 @@ /// \param X /// A 128-bit vector of [4 x float] values to be rounded up. /// \returns A 128-bit vector of [4 x float] containing the rounded values. -#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL) +#define _mm_ceil_ps(X) \ + __extension__({ \ + (__m128) __builtin_ia32_ceilps_128_mask((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(X), -1); \ + }) /// \brief Rounds up each element of the 128-bit vector of [2 x double] to an /// integer and returns the rounded values in a 128-bit vector of @@ -78,7 +82,11 @@ /// \param X /// A 128-bit vector of [2 x double] values to be rounded up. /// \returns A 128-bit vector of [2 x double] containing the rounded values. -#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL) +#define _mm_ceil_pd(X) \ + __extension__({ \ + (__m128) __builtin_ia32_ceilpd_128_mask((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(X), -1); \ + }) /// \brief Copies three upper elements of the first 128-bit vector operand to /// the corresponding three upper elements of the 128-bit result vector of @@ -103,7 +111,11 @@ /// of the result. /// \returns A 128-bit vector of [4 x float] containing the copied and rounded /// values. -#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL) +#define _mm_ceil_ss(X, Y) \ + __extension__({ \ + (__m128) __builtin_ia32_ceilps_128_mask((__v4sf)(__m128)(Y), \ + (__v4sf)(__m128)(X), 1); \ + }) /// \brief Copies the upper element of the first 128-bit vector operand to the /// corresponding upper element of the 128-bit result vector of [2 x double]. @@ -128,7 +140,11 @@ /// of the result. /// \returns A 128-bit vector of [2 x double] containing the copied and rounded /// values. -#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL) +#define _mm_ceil_sd(X, Y) \ + __extension__({ \ + (__m128) __builtin_ia32_ceilpd_128_mask((__v2df)(__m128d)(Y), \ + (__v2df)(__m128d)(X), 1); \ + }) /// \brief Rounds down each element of the 128-bit vector of [4 x float] to an /// an integer and returns the rounded values in a 128-bit vector of @@ -145,7 +161,11 @@ /// \param X /// A 128-bit vector of [4 x float] values to be rounded down. /// \returns A 128-bit vector of [4 x float] containing the rounded values. -#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR) +#define _mm_floor_ps(X) \ + __extension__({ \ + (__m128) __builtin_ia32_floorps_128_mask((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(X), -1); \ + }) /// \brief Rounds down each element of the 128-bit vector of [2 x double] to an /// integer and returns the rounded values in a 128-bit vector of @@ -162,7 +182,11 @@ /// \param X /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [2 x double] containing the rounded values. -#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) +#define _mm_floor_pd(X) \ + __extension__({ \ + (__m128) __builtin_ia32_floorpd_128_mask((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(X), -1); \ + }) /// \brief Copies three upper elements of the first 128-bit vector operand to /// the corresponding three upper elements of the 128-bit result vector of @@ -187,7 +211,11 @@ /// of the result. /// \returns A 128-bit vector of [4 x float] containing the copied and rounded /// values. -#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) +#define _mm_floor_ss(X, Y) \ + __extension__({ \ + (__m128) __builtin_ia32_floorps_128_mask((__v4sf)(__m128)(Y), \ + (__v4sf)(__m128)(X), 1); \ + }) /// \brief Copies the upper element of the first 128-bit vector operand to the /// corresponding upper element of the 128-bit result vector of [2 x double]. @@ -212,7 +240,11 @@ /// of the result. /// \returns A 128-bit vector of [2 x double] containing the copied and rounded /// values. -#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) +#define _mm_floor_sd(X, Y) \ + __extension__({ \ + (__m128) __builtin_ia32_floorpd_128_mask((__v2df)(__m128d)(Y), \ + (__v2df)(__m128d)(X), 1); \ + }) /// \brief Rounds each element of the 128-bit vector of [4 x float] to an /// integer value according to the rounding control specified by the second Index: test/CodeGen/avx-builtins.c =================================================================== --- test/CodeGen/avx-builtins.c +++ test/CodeGen/avx-builtins.c @@ -202,13 +202,15 @@ __m256d test_mm256_ceil_pd(__m256d x) { // CHECK-LABEL: test_mm256_ceil_pd - // CHECK: call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 2) + // CHECK: @llvm.ceil.v4f64 + // CHECK-NOT: select return _mm256_ceil_pd(x); } __m256 test_mm_ceil_ps(__m256 x) { // CHECK-LABEL: test_mm_ceil_ps - // CHECK: call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %{{.*}}, i32 2) + // CHECK: @llvm.ceil.v8f32 + // CHECK-NOT: select return _mm256_ceil_ps(x); } @@ -364,13 +366,15 @@ __m256d test_mm256_floor_pd(__m256d x) { // CHECK-LABEL: test_mm256_floor_pd - // CHECK: call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 1) + // CHECK: @llvm.floor.v4f64 + // CHECK-NOT: select return _mm256_floor_pd(x); } __m256 test_mm_floor_ps(__m256 x) { // CHECK-LABEL: test_mm_floor_ps - // CHECK: call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %{{.*}}, i32 1) + // CHECK: @llvm.floor.v8f32 + // CHECK-NOT: select return _mm256_floor_ps(x); } Index: test/CodeGen/avx512f-builtins.c =================================================================== --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -7485,31 +7485,67 @@ return _mm512_min_round_ps(__A,__B,_MM_FROUND_CUR_DIRECTION); } +__m512 test_mm512_floor_ps(__m512 __A) +{ + // CHECK-LABEL: @test_mm512_floor_ps + // CHECK: @llvm.floor.v16f32 + // CHECK-NOT: select + return _mm512_floor_ps(__A); +} + +__m512d test_mm512_floor_pd(__m512d __A) +{ + // CHECK-LABEL: @test_mm512_floor_pd + // CHECK: @llvm.floor.v8f64 + // CHECK-NOT: select + return _mm512_floor_pd(__A); +} + __m512 test_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_floor_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 + // CHECK-LABEL: @test_mm512_mask_floor_ps + // CHECK: @llvm.floor.v16f32 + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_floor_ps (__W,__U,__A); } __m512d test_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_floor_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 + // CHECK-LABEL: @test_mm512_mask_floor_pd + // CHECK: @llvm.floor.v8f64 + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_floor_pd (__W,__U,__A); } +__m512 test_mm512_ceil_ps(__m512 __A) +{ + // CHECK-LABEL: @test_mm512_ceil_ps + // CHECK: @llvm.ceil.v16f32 + // CHECK-NOT: select + return _mm512_ceil_ps(__A); +} + +__m512d test_mm512_ceil_pd(__m512d __A) +{ + // CHECK-LABEL: @test_mm512_ceil_pd + // CHECK: @llvm.ceil.v8f64 + // CHECK-NOT: select + return _mm512_ceil_pd(__A); +} + __m512 test_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_ceil_ps - // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 + // CHECK-LABEL: @test_mm512_mask_ceil_ps + // CHECK: @llvm.ceil.v16f32 + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_ceil_ps (__W,__U,__A); } __m512d test_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_ceil_pd - // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 + // CHECK-LABEL: @test_mm512_mask_ceil_pd + // CHECK: @llvm.ceil.v8f64 + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_ceil_pd (__W,__U,__A); } Index: test/CodeGen/sse41-builtins.c =================================================================== --- test/CodeGen/sse41-builtins.c +++ test/CodeGen/sse41-builtins.c @@ -44,25 +44,29 @@ __m128d test_mm_ceil_pd(__m128d x) { // CHECK-LABEL: test_mm_ceil_pd - // CHECK: call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 2) + // CHECK: @llvm.ceil.v2f64 + // CHECK-NOT: select return _mm_ceil_pd(x); } __m128 test_mm_ceil_ps(__m128 x) { // CHECK-LABEL: test_mm_ceil_ps - // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 2) + // CHECK: @llvm.ceil.v4f32 + // CHECK-NOT: select return _mm_ceil_ps(x); } __m128d test_mm_ceil_sd(__m128d x, __m128d y) { // CHECK-LABEL: test_mm_ceil_sd - // CHECK: call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 2) + // CHECK: @llvm.ceil.v2f64 + // CHECK: select return _mm_ceil_sd(x, y); } __m128 test_mm_ceil_ss(__m128 x, __m128 y) { // CHECK-LABEL: test_mm_ceil_ss - // CHECK: call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 2) + // CHECK: @llvm.ceil.v4f32 + // CHECK: select return _mm_ceil_ss(x, y); } @@ -196,25 +200,29 @@ __m128d test_mm_floor_pd(__m128d x) { // CHECK-LABEL: test_mm_floor_pd - // CHECK: call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 1) + // CHECK: @llvm.floor.v2f64 + // CHECK-NOT: select return _mm_floor_pd(x); } __m128 test_mm_floor_ps(__m128 x) { // CHECK-LABEL: test_mm_floor_ps - // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 1) + // CHECK: @llvm.floor.v4f32 + // CHECK-NOT: select return _mm_floor_ps(x); } __m128d test_mm_floor_sd(__m128d x, __m128d y) { // CHECK-LABEL: test_mm_floor_sd - // CHECK: call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 1) + // CHECK: @llvm.floor.v2f64 + // CHECK: select return _mm_floor_sd(x, y); } __m128 test_mm_floor_ss(__m128 x, __m128 y) { // CHECK-LABEL: test_mm_floor_ss - // CHECK: call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 1) + // CHECK: @llvm.floor.v4f32 + // CHECK: select return _mm_floor_ss(x, y); }