diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -397,14 +397,15 @@ static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a) { - return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); + return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0, 1, 2, 3, 4, 5, 6, 7); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a) { - return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, - -1, -1, -1, -1, -1, -1, -1, -1); + return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15); } static __inline __m128d __DEFAULT_FN_ATTRS512 @@ -446,7 +447,7 @@ static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512 (__m128d __A) { - return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); + return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 2, 3, 2, 3); } static __inline __m512d __DEFAULT_FN_ATTRS512 @@ -464,19 +465,19 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512 (__m128 __A) { - return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512 (__m128i __A) { - return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); + return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 2, 3, 2, 3); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512 (__m256i __A) { - return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1); + return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7); } static __inline __m512 __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h --- a/clang/lib/Headers/avx512fp16intrin.h +++ b/clang/lib/Headers/avx512fp16intrin.h @@ -192,22 +192,25 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castph128_ph256(__m128h __a) { - return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, - -1, -1, -1, -1, -1); + return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castph128_ph512(__m128h __a) { - return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1); + return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, + 10, 11, 12, 13, 14, 15); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castph256_ph512(__m256h __a) { - return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1); + return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31); } /// Constructs a 256-bit floating-point vector of [16 x half] from a diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -4499,7 +4499,7 @@ static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castpd128_pd256(__m128d __a) { - return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 1, -1, -1); + return __builtin_shufflevector((__v2df)__a, (__v2df)__builtin_nondeterministic_value(__a), 0, 1, 2, 3); } /// Constructs a 256-bit floating-point vector of [8 x float] from a @@ -4520,7 +4520,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castps128_ps256(__m128 __a) { - return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1, 2, 3, -1, -1, -1, -1); + return __builtin_shufflevector((__v4sf)__a, (__v4sf)__builtin_nondeterministic_value(__a), 0, 1, 2, 3, 4, 5, 6, 7); } /// Constructs a 256-bit integer vector from a 128-bit integer vector. @@ -4539,7 +4539,7 @@ static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castsi128_si256(__m128i __a) { - return __builtin_shufflevector((__v2di)__a, (__v2di)__a, 0, 1, -1, -1); + return __builtin_shufflevector((__v2di)__a, (__v2di)__builtin_nondeterministic_value(__a), 0, 1, 2, 3); } /// Constructs a 256-bit floating-point vector of [4 x double] from a diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -143,7 +143,8 @@ __m256d test_mm256_castpd128_pd256(__m128d A) { // CHECK-LABEL: test_mm256_castpd128_pd256 - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> + // CHECK: [[A:%.*]] = freeze <2 x double> poison + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> [[A]], <4 x i32> return _mm256_castpd128_pd256(A); } @@ -165,7 +166,8 @@ __m256 test_mm256_castps128_ps256(__m128 A) { // CHECK-LABEL: test_mm256_castps128_ps256 - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> + // CHECK: [[A:%.*]] = freeze <4 x float> poison + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> [[A]], <8 x i32> return _mm256_castps128_ps256(A); } @@ -177,7 +179,8 @@ __m256i test_mm256_castsi128_si256(__m128i A) { // CHECK-LABEL: test_mm256_castsi128_si256 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> + // CHECK: [[A:%.*]] = freeze <2 x i64> poison + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> [[A]], <4 x i32> return _mm256_castsi128_si256(A); } diff --git a/clang/test/CodeGen/X86/avx-cast-builtins.c b/clang/test/CodeGen/X86/avx-cast-builtins.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/X86/avx-cast-builtins.c @@ -0,0 +1,100 @@ +// RUN: %clang_cc1 %s -O3 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512fp16 -S -o - | FileCheck %s + + +#include + +__m256d test_mm256_castpd128_pd256(__m128d A) { + // CHECK-LABEL: test_mm256_castpd128_pd256 + // CHECK: # %bb.0: + // CHECK-NEXT: vmovaps %xmm0, %xmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm256_castpd128_pd256(A); +} + +__m256 test_mm256_castps128_ps256(__m128 A) { + // CHECK-LABEL: test_mm256_castps128_ps256 + // CHECK: # %bb.0: + // CHECK-NEXT: vmovaps %xmm0, %xmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm256_castps128_ps256(A); +} + +__m256i test_mm256_castsi128_si256(__m128i A) { + // CHECK-LABEL: test_mm256_castsi128_si256 + // CHECK: # %bb.0: + // CHECK-NEXT: vmovaps %xmm0, %xmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm256_castsi128_si256(A); +} + +__m256h test_mm256_castph128_ph256(__m128h A) { + // CHECK-LABEL: test_mm256_castph128_ph256 + // CHECK: # %bb.0: + // CHECK-NEXT: vmovaps %xmm0, %xmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm256_castph128_ph256(A); +} + +__m512h test_mm512_castph128_ph512(__m128h A) { + // CHECK-LABEL: test_mm512_castph128_ph512 + // CHECK: # %bb.0: + // CHECK-NEXT: vmovaps %xmm0, %xmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castph128_ph512(A); +} + +__m512h test_mm512_castph256_ph512(__m256h A) { + // CHECK-LABEL: test_mm512_castph256_ph512 + // CHECK: # %bb.0: + // CHECK-NEXT: vmovaps %ymm0, %ymm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castph256_ph512(A); +} + +__m512d test_mm512_castpd256_pd512(__m256d A){ + // CHECK-LABEL: test_mm512_castpd256_pd512 + // CHECK: # %bb.0: + // CHECK-NEXT: vmovaps %ymm0, %ymm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castpd256_pd512(A); +} + +__m512 test_mm512_castps256_ps512(__m256 A){ + // CHECK-LABEL: test_mm512_castps256_ps512 + // CHECK: # %bb.0: + // CHECK-NEXT: vmovaps %ymm0, %ymm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castps256_ps512(A); +} + +__m512d test_mm512_castpd128_pd512(__m128d A){ + // CHECK-LABEL: test_mm512_castpd128_pd512 + // CHECK: # %bb.0: + // CHECK-NEXT: vmovaps %xmm0, %xmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castpd128_pd512(A); +} + +__m512 test_mm512_castps128_ps512(__m128 A){ + // CHECK-LABEL: test_mm512_castps128_ps512 + // CHECK: # %bb.0: + // CHECK-NEXT: vmovaps %xmm0, %xmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castps128_ps512(A); +} + +__m512i test_mm512_castsi128_si512(__m128i A){ + // CHECK-LABEL: test_mm512_castsi128_si512 + // CHECK: # %bb.0: + // CHECK-NEXT: vmovaps %xmm0, %xmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castsi128_si512(A); +} + +__m512i test_mm512_castsi256_si512(__m256i A){ + // CHECK-LABEL: test_mm512_castsi256_si512 + // CHECK: # %bb.0: + // CHECK-NEXT: vmovaps %ymm0, %ymm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castsi256_si512(A); +} diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8987,13 +8987,15 @@ __m512 test_mm512_castps128_ps512(__m128 __A) { // CHECK-LABEL: @test_mm512_castps128_ps512 - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> + // CHECK: [[A:%.*]] = freeze <4 x float> poison + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> [[A]], <16 x i32> return _mm512_castps128_ps512(__A); } __m512d test_mm512_castpd128_pd512(__m128d __A) { // CHECK-LABEL: @test_mm512_castpd128_pd512 - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> + // CHECK: [[A:%.*]] = freeze <2 x double> poison + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> [[A]], <8 x i32> return _mm512_castpd128_pd512(__A); } @@ -9086,7 +9088,8 @@ __m512d test_mm512_castpd256_pd512(__m256d a) { // CHECK-LABEL: @test_mm512_castpd256_pd512 - // CHECK: shufflevector <4 x double> {{.*}} + // CHECK: [[A:%.*]] = freeze <4 x double> poison + // CHECK: shufflevector <4 x double> {{.*}}, <4 x double> [[A]], return _mm512_castpd256_pd512(a); } @@ -9112,13 +9115,15 @@ } __m512i test_mm512_castsi128_si512(__m128i __A) { // CHECK-LABEL: @test_mm512_castsi128_si512 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> + // CHECK: [[A:%.*]] = freeze <2 x i64> poison + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> [[A]], <8 x i32> return _mm512_castsi128_si512(__A); } __m512i test_mm512_castsi256_si512(__m256i __A) { // CHECK-LABEL: @test_mm512_castsi256_si512 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> + // CHECK: [[A:%.*]] = freeze <4 x i64> poison + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> [[A]], <8 x i32> return _mm512_castsi256_si512(__A); } diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c --- a/clang/test/CodeGen/X86/avx512fp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c @@ -326,19 +326,22 @@ __m256h test_mm256_castph128_ph256(__m128h __a) { // CHECK-LABEL: test_mm256_castph128_ph256 - // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> %{{.*}}, <16 x i32> + // CHECK: [[A:%.*]] = freeze <8 x half> poison + // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> [[A]], <16 x i32> return _mm256_castph128_ph256(__a); } __m512h test_mm512_castph128_ph512(__m128h __a) { // CHECK-LABEL: test_mm512_castph128_ph512 - // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> %{{.*}}, <32 x i32> + // CHECK: [[A:%.*]] = freeze <8 x half> poison + // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> [[A]], <32 x i32> return _mm512_castph128_ph512(__a); } __m512h test_mm512_castph256_ph512(__m256h __a) { // CHECK-LABEL: test_mm512_castph256_ph512 - // CHECK: shufflevector <16 x half> %{{.*}}, <16 x half> %{{.*}}, <32 x i32> + // CHECK: [[A:%.*]] = freeze <16 x half> poison + // CHECK: shufflevector <16 x half> %{{.*}}, <16 x half> [[A]], <32 x i32> return _mm512_castph256_ph512(__a); }