Index: clang/lib/Headers/emmintrin.h =================================================================== --- clang/lib/Headers/emmintrin.h +++ clang/lib/Headers/emmintrin.h @@ -10,6 +10,10 @@ #ifndef __EMMINTRIN_H #define __EMMINTRIN_H +/* Turn off reassociation for inrinsics, push state to restore at end. */ +#pragma float_control(push) +#pragma clang fp reassociate(off) + #include typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); @@ -4978,4 +4982,6 @@ #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) +#pragma float_control(pop) + #endif /* __EMMINTRIN_H */ Index: clang/lib/Headers/immintrin.h =================================================================== --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -10,6 +10,10 @@ #ifndef __IMMINTRIN_H #define __IMMINTRIN_H +/* Turn off reassociation for inrinsics, push state to restore at end. */ +#pragma float_control(push) +#pragma clang fp reassociate(off) + #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ @@ -597,4 +601,6 @@ #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ +#pragma float_control(pop) + #endif /* __IMMINTRIN_H */ Index: clang/lib/Headers/x86intrin.h =================================================================== --- clang/lib/Headers/x86intrin.h +++ clang/lib/Headers/x86intrin.h @@ -10,6 +10,10 @@ #ifndef __X86INTRIN_H #define __X86INTRIN_H +/* Turn off reassociation for inrinsics, push state to restore at end. */ +#pragma float_control(push) +#pragma clang fp reassociate(off) + #include #include @@ -59,5 +63,6 @@ #include #endif +#pragma float_control(pop) #endif /* __X86INTRIN_H */ Index: clang/lib/Headers/xmmintrin.h =================================================================== --- clang/lib/Headers/xmmintrin.h +++ clang/lib/Headers/xmmintrin.h @@ -10,6 +10,10 @@ #ifndef __XMMINTRIN_H #define __XMMINTRIN_H +/* Turn off reassociation for inrinsics, push state to restore at end. */ +#pragma float_control(push) +#pragma clang fp reassociate(off) + #include typedef int __v4si __attribute__((__vector_size__(16))); @@ -3005,4 +3009,6 @@ #include #endif +#pragma float_control(pop) + #endif /* __XMMINTRIN_H */ Index: clang/test/Headers/emmintrin.c =================================================================== --- /dev/null +++ clang/test/Headers/emmintrin.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 %s -ffreestanding -ffast-math -triple x86_64-apple-macosx10.9.0 -emit-llvm -o - \ +// RUN: | FileCheck -check-prefix=CKFMATH %s +// +#include + +// Make sure that the llvm IR for _mm_add_pd doesn't have fast attribute and +// doesn't have reassoc set. +// CKFMATH: define{{.*}} <2 x double> @test_emmintrin_no_reassoc +// CKFMATH: fadd nnan ninf nsz arcp afn +__m128d test_emmintrin_no_reassoc(__m128d __a, __m128d __b) { + return _mm_add_pd(__a, __b); +} + +// Make sure that all fast flags were restored outside of the include file. +// CKFMATH: define{{.*}} double @test_fast +// CKFMATH: fadd reassoc nnan ninf nsz arcp afn double +double test_fast(double __a, double __b) { + return __a + __b; +} Index: clang/test/Headers/immintrin.c =================================================================== --- /dev/null +++ clang/test/Headers/immintrin.c @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 %s -ffreestanding -ffast-math -triple i386-unknown-unknown -emit-llvm -o - \ +// RUN: | FileCheck -check-prefix=CKFMATH %s +// +// RUN: %clang_cc1 %s -ffreestanding -ffast-math -triple x86_64-unknown-unknown -emit-llvm -o - \ +// RUN: | FileCheck -check-prefix=CKFMATH %s + +// Include the metaheader that includes all intel intrinsic headers. +#include + +// Make sure that the llvm IR for _mm_add_ps doesn't have fast attribute and +// doesn't have reassoc set. +// CKFMATH: define{{.*}} <4 x float> @test_xmmintrin_no_reassoc +// CKFMATH: fadd nnan ninf nsz arcp afn <4 x float> +__m128 __attribute__((__target__("sse"))) test_xmmintrin_no_reassoc(__m128 __a, __m128 __b) { + return _mm_add_ps(__a, __b); +} + +// Make sure that the llvm IR for _mm_add_pd doesn't have fast attribute and +// doesn't have reassoc set. +// CKFMATH: define{{.*}} <2 x double> @test_emmintrin_no_reassoc +// CKFMATH: fadd nnan ninf nsz arcp afn <2 x double> +__m128d __attribute__((__target__("sse2"))) test_emmintrin_no_reassoc(__m128d __a, __m128d __b) { + return _mm_add_pd(__a, __b); +} + +// Make sure that the llvm IR for _mm256_add_ps doesn't have fast attribute and +// doesn't have reassoc set. +// This intrinsic comes from avxintrin.h, and so is checking that +// changes in immintrin.h affect the files it includes as well. +// CKFMATH: define{{.*}} <8 x float> @test_mm256intrin_no_reassoc +// CKFMATH: fadd nnan ninf nsz arcp afn <8 x float> +__m256 __attribute__((__target__(("avx")))) test_mm256intrin_no_reassoc(__m256 __a, __m256 __b) { + return _mm256_add_ps(__a, __b); +} + +// Make sure that the llvm IR for _mm512_add_ps doesn't have fast attribute and +// doesn't have reassoc set. +// This intrinsic comes from avxintrin.h +// CKFMATH: define{{.*}} <16 x float> @test_mm512intrin_no_reassoc +// CKFMATH: fadd nnan ninf nsz arcp afn <16 x float> +__m512 __attribute__((__target__(("avx512f")))) test_mm512intrin_no_reassoc(__m512 __a, __m512 __b) { + return _mm512_add_ps(__a, __b); +} + +// Make sure that all fast flags were restored outside of the include file. +// CKFMATH: define{{.*}} double @test_fast +// CKFMATH: fadd reassoc nnan ninf nsz arcp afn double +double test_fast(double __a, double __b) { + return __a + __b; +} Index: clang/test/Headers/x86intrin-3.c =================================================================== --- /dev/null +++ clang/test/Headers/x86intrin-3.c @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 %s -ffreestanding -ffast-math -triple i386-unknown-unknown -emit-llvm -o - \ +// RUN: | FileCheck -check-prefix=CKFMATH %s +// +// RUN: %clang_cc1 %s -ffreestanding -ffast-math -triple x86_64-unknown-unknown -emit-llvm -o - \ +// RUN: | FileCheck -check-prefix=CKFMATH %s + +// Include the metaheader that includes all x86 intrinsic headers. +#include + +// Make sure that the llvm IR for _mm_add_ps doesn't have fast attribute and +// doesn't have reassoc set. +// CKFMATH: define{{.*}} <4 x float> @test_xmmintrin_no_reassoc +// CKFMATH: fadd nnan ninf nsz arcp afn <4 x float> +__m128 __attribute__((__target__("sse"))) test_xmmintrin_no_reassoc(__m128 __a, __m128 __b) { + return _mm_add_ps(__a, __b); +} + +// Make sure that the llvm IR for _mm_add_pd doesn't have fast attribute and +// doesn't have reassoc set. +// CKFMATH: define{{.*}} <2 x double> @test_emmintrin_no_reassoc +// CKFMATH: fadd nnan ninf nsz arcp afn <2 x double> +__m128d __attribute__((__target__("sse2"))) test_emmintrin_no_reassoc(__m128d __a, __m128d __b) { + return _mm_add_pd(__a, __b); +} + +// Make sure that the llvm IR for _mm256_add_ps doesn't have fast attribute and +// doesn't have reassoc set. +// This intrinsic comes from avxintrin.h, and so is checking that +// changes in immintrin.h affect the files it includes as well. +// CKFMATH: define{{.*}} <8 x float> @test_mm256intrin_no_reassoc +// CKFMATH: fadd nnan ninf nsz arcp afn <8 x float> +__m256 __attribute__((__target__(("avx")))) test_mm256intrin_no_reassoc(__m256 __a, __m256 __b) { + return _mm256_add_ps(__a, __b); +} + +// Make sure that the llvm IR for _mm512_add_ps doesn't have fast attribute and +// doesn't have reassoc set. +// This intrinsic comes from avxintrin.h +// CKFMATH: define{{.*}} <16 x float> @test_mm512intrin_no_reassoc +// CKFMATH: fadd nnan ninf nsz arcp afn <16 x float> +__m512 __attribute__((__target__(("avx512f")))) test_mm512intrin_no_reassoc(__m512 __a, __m512 __b) { + return _mm512_add_ps(__a, __b); +} + +// Make sure that all fast flags were restored outside of the include file. +// CKFMATH: define{{.*}} double @test_fast +// CKFMATH: fadd reassoc nnan ninf nsz arcp afn double +double test_fast(double __a, double __b) { + return __a + __b; +} Index: clang/test/Headers/xmmintrin.c =================================================================== --- clang/test/Headers/xmmintrin.c +++ clang/test/Headers/xmmintrin.c @@ -1,5 +1,8 @@ // RUN: %clang_cc1 %s -ffreestanding -triple x86_64-apple-macosx10.9.0 -emit-llvm -o - | FileCheck %s // +// RUN: %clang_cc1 %s -ffreestanding -ffast-math -triple x86_64-apple-macosx10.9.0 -emit-llvm -o - \ +// RUN: | FileCheck -check-prefix=CKFMATH %s +// // RUN: rm -rf %t // RUN: %clang_cc1 %s -ffreestanding -triple x86_64-apple-macosx10.9.0 -emit-llvm -o - \ // RUN: -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -isystem %S/Inputs/include \ @@ -26,6 +29,21 @@ return _mm_add_sd(__a, __b); } +// Make sure that the llvm IR for _mm_add_ps doesn't have fast attribute and +// doesn't have reassoc set. +// CKFMATH: define{{.*}} <4 x float> @test_xmmintrin_no_reassoc +// CKFMATH: fadd nnan ninf nsz arcp afn <4 x float> +__m128 test_xmmintrin_no_reassoc(__m128 __a, __m128 __b) { + return _mm_add_ps(__a, __b); +} + +// Make sure that all fast flags were restored outside of the include file. +// CKFMATH: define{{.*}} double @test_fast +// CKFMATH: fadd reassoc nnan ninf nsz arcp afn double +double test_fast(double __a, double __b) { + return __a + __b; +} + #if __STDC_HOSTED__ // Make sure stdlib.h symbols are accessible. void *p = NULL;