diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -397,14 +397,15 @@ static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a) { - return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); + return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0, + 1, 2, 3, 4, 5, 6, 7); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a) { - return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, - -1, -1, -1, -1, -1, -1, -1, -1); + return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } static __inline __m128d __DEFAULT_FN_ATTRS512 @@ -446,7 +447,10 @@ static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512 (__m128d __A) { - return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); + __m256d __B = __builtin_nondeterministic_value(__B); + return __builtin_shufflevector( + __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3), + __B, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline __m512d __DEFAULT_FN_ATTRS512 @@ -464,19 +468,25 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512 (__m128 __A) { - return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + __m256 __B = __builtin_nondeterministic_value(__B); + return __builtin_shufflevector( + __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7), + __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512 (__m128i __A) { - return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); + __m256i __B = __builtin_nondeterministic_value(__B); + return __builtin_shufflevector( + __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3), + __B, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512 (__m256i __A) { - return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1); + return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7); } static __inline __m512 __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h --- a/clang/lib/Headers/avx512fp16intrin.h +++ b/clang/lib/Headers/avx512fp16intrin.h @@ -192,22 +192,26 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castph128_ph256(__m128h __a) { - return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, - -1, -1, -1, -1, -1); + return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castph128_ph512(__m128h __a) { - return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1); + __m256h __b = __builtin_nondeterministic_value(__b); + return __builtin_shufflevector( + __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + __b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castph256_ph512(__m256h __a) { - return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1); + return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31); } /// Constructs a 256-bit floating-point vector of [16 x half] from a diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -4499,7 +4499,8 @@ static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castpd128_pd256(__m128d __a) { - return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 1, -1, -1); + return __builtin_shufflevector( + (__v2df)__a, (__v2df)__builtin_nondeterministic_value(__a), 0, 1, 2, 3); } /// Constructs a 256-bit floating-point vector of [8 x float] from a @@ -4520,7 +4521,9 @@ static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castps128_ps256(__m128 __a) { - return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1, 2, 3, -1, -1, -1, -1); + return __builtin_shufflevector((__v4sf)__a, + (__v4sf)__builtin_nondeterministic_value(__a), + 0, 1, 2, 3, 4, 5, 6, 7); } /// Constructs a 256-bit integer vector from a 128-bit integer vector. @@ -4539,7 +4542,8 @@ static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castsi128_si256(__m128i __a) { - return __builtin_shufflevector((__v2di)__a, (__v2di)__a, 0, 1, -1, -1); + return __builtin_shufflevector( + (__v2di)__a, (__v2di)__builtin_nondeterministic_value(__a), 0, 1, 2, 3); } /// Constructs a 256-bit floating-point vector of [4 x double] from a diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -143,7 +143,8 @@ __m256d test_mm256_castpd128_pd256(__m128d A) { // CHECK-LABEL: test_mm256_castpd128_pd256 - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> + // CHECK: [[A:%.*]] = freeze <2 x double> poison + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> [[A]], <4 x i32> return _mm256_castpd128_pd256(A); } @@ -165,7 +166,8 @@ __m256 test_mm256_castps128_ps256(__m128 A) { // CHECK-LABEL: test_mm256_castps128_ps256 - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> + // CHECK: [[A:%.*]] = freeze <4 x float> poison + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> [[A]], <8 x i32> return _mm256_castps128_ps256(A); } @@ -177,7 +179,8 @@ __m256i test_mm256_castsi128_si256(__m128i A) { // CHECK-LABEL: test_mm256_castsi128_si256 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> + // CHECK: [[A:%.*]] = freeze <2 x i64> poison + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> [[A]], <4 x i32> return _mm256_castsi128_si256(A); } diff --git a/clang/test/CodeGen/X86/avx-cast-builtins.c b/clang/test/CodeGen/X86/avx-cast-builtins.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/X86/avx-cast-builtins.c @@ -0,0 +1,101 @@ +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -O3 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512fp16 -S -o - | FileCheck %s + + +#include + +__m256d test_mm256_castpd128_pd256(__m128d A) { + // CHECK-LABEL: test_mm256_castpd128_pd256 + // CHECK: # %bb.0: + // CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm256_castpd128_pd256(A); +} + +__m256 test_mm256_castps128_ps256(__m128 A) { + // CHECK-LABEL: test_mm256_castps128_ps256 + // CHECK: # %bb.0: + // CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm256_castps128_ps256(A); +} + +__m256i test_mm256_castsi128_si256(__m128i A) { + // CHECK-LABEL: test_mm256_castsi128_si256 + // CHECK: # %bb.0: + // CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm256_castsi128_si256(A); +} + +__m256h test_mm256_castph128_ph256(__m128h A) { + // CHECK-LABEL: test_mm256_castph128_ph256 + // CHECK: # %bb.0: + // CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm256_castph128_ph256(A); +} + +__m512h test_mm512_castph128_ph512(__m128h A) { + // CHECK-LABEL: test_mm512_castph128_ph512 + // CHECK: # %bb.0: + // CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castph128_ph512(A); +} + +__m512h test_mm512_castph256_ph512(__m256h A) { + // CHECK-LABEL: test_mm512_castph256_ph512 + // CHECK: # %bb.0: + // CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castph256_ph512(A); +} + +__m512d test_mm512_castpd256_pd512(__m256d A){ + // CHECK-LABEL: test_mm512_castpd256_pd512 + // CHECK: # %bb.0: + // CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castpd256_pd512(A); +} + +__m512 test_mm512_castps256_ps512(__m256 A){ + // CHECK-LABEL: test_mm512_castps256_ps512 + // CHECK: # %bb.0: + // CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castps256_ps512(A); +} + +__m512d test_mm512_castpd128_pd512(__m128d A){ + // CHECK-LABEL: test_mm512_castpd128_pd512 + // CHECK: # %bb.0: + // CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castpd128_pd512(A); +} + +__m512 test_mm512_castps128_ps512(__m128 A){ + // CHECK-LABEL: test_mm512_castps128_ps512 + // CHECK: # %bb.0: + // CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castps128_ps512(A); +} + +__m512i test_mm512_castsi128_si512(__m128i A){ + // CHECK-LABEL: test_mm512_castsi128_si512 + // CHECK: # %bb.0: + // CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castsi128_si512(A); +} + +__m512i test_mm512_castsi256_si512(__m256i A){ + // CHECK-LABEL: test_mm512_castsi256_si512 + // CHECK: # %bb.0: + // CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 + // CHECK-NEXT: ret{{[l|q]}} + return _mm512_castsi256_si512(A); +} diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8987,13 +8987,23 @@ __m512 test_mm512_castps128_ps512(__m128 __A) { // CHECK-LABEL: @test_mm512_castps128_ps512 - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> + // CHECK: [[B:%.*]] = freeze <8 x float> poison + // CHECK: store <8 x float> [[B]], ptr [[BA:%.*]] + // CHECK: [[A:%.*]] = freeze <4 x float> poison + // CHECK: [[SV:%.*]] = shufflevector <4 x float> %{{.*}}, <4 x float> [[A]], <8 x i32> + // CHECK: [[C:%.*]] = load <8 x float>, ptr [[BA]] + // CHECK: shufflevector <8 x float> [[SV]], <8 x float> [[C]], <16 x i32> return _mm512_castps128_ps512(__A); } __m512d test_mm512_castpd128_pd512(__m128d __A) { // CHECK-LABEL: @test_mm512_castpd128_pd512 - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> + // CHECK: [[B:%.*]] = freeze <4 x double> poison + // CHECK: store <4 x double> [[B]], ptr [[BA:%.*]] + // CHECK: [[A:%.*]] = freeze <2 x double> poison + // CHECK: [[SV:%.*]] = shufflevector <2 x double> %{{.*}}, <2 x double> [[A]], <4 x i32> + // CHECK: [[C:%.*]] = load <4 x double>, ptr [[BA]] + // CHECK: shufflevector <4 x double> [[SV]], <4 x double> [[C]], <8 x i32> return _mm512_castpd128_pd512(__A); } @@ -9086,7 +9096,8 @@ __m512d test_mm512_castpd256_pd512(__m256d a) { // CHECK-LABEL: @test_mm512_castpd256_pd512 - // CHECK: shufflevector <4 x double> {{.*}} + // CHECK: [[A:%.*]] = freeze <4 x double> poison + // CHECK: shufflevector <4 x double> %{{.}}, <4 x double> [[A]], <8 x i32> return _mm512_castpd256_pd512(a); } @@ -9112,13 +9123,19 @@ } __m512i test_mm512_castsi128_si512(__m128i __A) { // CHECK-LABEL: @test_mm512_castsi128_si512 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> + // CHECK: [[B:%.*]] = freeze <4 x i64> poison + // CHECK: store <4 x i64> [[B]], ptr [[BA:%.*]] + // CHECK: [[A:%.*]] = freeze <2 x i64> poison + // CHECK: [[SV:%.*]] = shufflevector <2 x i64> %{{.*}}, <2 x i64> [[A]], <4 x i32> + // CHECK: [[C:%.*]] = load <4 x i64>, ptr [[BA]] + // CHECK: shufflevector <4 x i64> [[SV]], <4 x i64> [[C]], <8 x i32> return _mm512_castsi128_si512(__A); } __m512i test_mm512_castsi256_si512(__m256i __A) { // CHECK-LABEL: @test_mm512_castsi256_si512 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> + // CHECK: [[A:%.*]] = freeze <4 x i64> poison + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> [[A]], <8 x i32> return _mm512_castsi256_si512(__A); } diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c --- a/clang/test/CodeGen/X86/avx512fp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c @@ -325,19 +325,26 @@ __m256h test_mm256_castph128_ph256(__m128h __a) { // CHECK-LABEL: test_mm256_castph128_ph256 - // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> %{{.*}}, <16 x i32> + // CHECK: [[A:%.*]] = freeze <8 x half> poison + // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> [[A]], <16 x i32> return _mm256_castph128_ph256(__a); } __m512h test_mm512_castph128_ph512(__m128h __a) { // CHECK-LABEL: test_mm512_castph128_ph512 - // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> %{{.*}}, <32 x i32> + // CHECK: [[B:%.*]] = freeze <16 x half> poison + // CHECK: store <16 x half> [[B]], ptr [[BA:%.*]] + // CHECK: [[A:%.*]] = freeze <8 x half> poison + // CHECK: [[SV:%.*]] = shufflevector <8 x half> %{{.*}}, <8 x half> [[A]], <16 x i32> + // CHECK: [[C:%.*]] = load <16 x half>, ptr [[BA]] + // CHECK: shufflevector <16 x half> [[SV]], <16 x half> [[C]], <32 x i32> return _mm512_castph128_ph512(__a); } __m512h test_mm512_castph256_ph512(__m256h __a) { // CHECK-LABEL: test_mm512_castph256_ph512 - // CHECK: shufflevector <16 x half> %{{.*}}, <16 x half> %{{.*}}, <32 x i32> + // CHECK: [[A:%.*]] = freeze <16 x half> poison + // CHECK: shufflevector <16 x half> %{{.*}}, <16 x half> [[A]], <32 x i32> return _mm512_castph256_ph512(__a); }