diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -12491,12 +12491,9 @@ case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: - // The x86 definition of "undef" is not the same as the LLVM definition - // (PR32176). We leave optimizing away an unnecessary zero constant to the - // IR optimizer and backend. - // TODO: If we had a "freeze" IR instruction to generate a fixed undef - // value, we should use that here instead of a zero. - return llvm::Constant::getNullValue(ConvertType(E->getType())); + // The x86 definition of "undef" is equivalent to "freeze poison" in LLVM + // (PR32176). + return Builder.CreateFreeze(PoisonValue::get(ConvertType(E->getType()))); case X86::BI__builtin_ia32_vec_init_v8qi: case X86::BI__builtin_ia32_vec_init_v4hi: case X86::BI__builtin_ia32_vec_init_v2si: diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -2063,19 +2063,24 @@ __m256 test_mm256_undefined_ps() { // CHECK-LABEL: test_mm256_undefined_ps - // CHECK: ret <8 x float> zeroinitializer + // CHECK: freeze <4 x double> poison + // CHECK: bitcast <4 x double> %{{.*}} to <8 x float> + // CHECK: ret <8 x float> %{{.*}} return _mm256_undefined_ps(); } __m256d test_mm256_undefined_pd() { // CHECK-LABEL: test_mm256_undefined_pd - // CHECK: ret <4 x double> zeroinitializer + // CHECK: freeze <4 x double> poison + // CHECK: ret <4 x double> %{{.*}} return _mm256_undefined_pd(); } __m256i test_mm256_undefined_si256() { // CHECK-LABEL: test_mm256_undefined_si256 - // CHECK: ret <4 x i64> zeroinitializer + // CHECK: freeze <4 x double> poison + // CHECK: bitcast <4 x double> %{{.*}} to <4 x i64> + // CHECK: ret <4 x i64> %{{.*}} return _mm256_undefined_si256(); } diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -455,7 +455,9 @@ __m128i test_mm_i32gather_epi64(long long const *b, __m128i c) { // CHECK-LABEL: test_mm_i32gather_epi64 - // CHECK: call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2) + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64> + // CHECK: call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2) return _mm_i32gather_epi64(b, c, 2); } @@ -467,7 +469,9 @@ __m256i test_mm256_i32gather_epi64(long long const *b, __m128i c) { // CHECK-LABEL: test_mm256_i32gather_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2) + // CHECK: %[[FR:.*]] = freeze <4 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <4 x i64> + // CHECK: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2) return _mm256_i32gather_epi64(b, c, 2); } @@ -479,10 +483,11 @@ __m128d test_mm_i32gather_pd(double const *b, __m128i c) { // CHECK-LABEL: test_mm_i32gather_pd + // CHECK: %[[FR:.*]] = freeze <2 x double> poison // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double> // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> - // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2) + // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %[[FR]], i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2) return _mm_i32gather_pd(b, c, 2); } @@ -494,10 +499,11 @@ __m256d test_mm256_i32gather_pd(double const *b, __m128i c) { // CHECK-LABEL: test_mm256_i32gather_pd + // CHECK: %[[FR:.*]] = freeze <4 x double> poison // CHECK: [[CMP:%.*]] = fcmp oeq <4 x double> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double> - // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2) + // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %[[FR]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2) return _mm256_i32gather_pd(b, c, 2); } @@ -509,10 +515,12 @@ __m128 test_mm_i32gather_ps(float const *b, __m128i c) { // CHECK-LABEL: test_mm_i32gather_ps + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float> // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> - // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2) + // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2) return _mm_i32gather_ps(b, c, 2); } @@ -524,10 +532,12 @@ __m256 test_mm256_i32gather_ps(float const *b, __m256i c) { // CHECK-LABEL: test_mm256_i32gather_ps + // CHECK: %[[FR:.*]] = freeze <4 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <8 x float> // CHECK: [[CMP:%.*]] = fcmp oeq <8 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float> - // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2) + // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %[[FR_BC]], i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2) return _mm256_i32gather_ps(b, c, 2); } @@ -563,7 +573,9 @@ __m128i test_mm_i64gather_epi64(long long const *b, __m128i c) { // CHECK-LABEL: test_mm_i64gather_epi64 - // CHECK: call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2) + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64> + // CHECK: call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %[[FR_BC]], i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2) return _mm_i64gather_epi64(b, c, 2); } @@ -575,7 +587,9 @@ __m256i test_mm256_i64gather_epi64(long long const *b, __m256i c) { // CHECK-LABEL: test_mm256_i64gather_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2) + // CHECK: %[[FR:.*]] = freeze <4 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <4 x i64> + // CHECK: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2) return _mm256_i64gather_epi64(b, c, 2); } @@ -587,10 +601,11 @@ __m128d test_mm_i64gather_pd(double const *b, __m128i c) { // CHECK-LABEL: test_mm_i64gather_pd + // CHECK: %[[FR:.*]] = freeze <2 x double> poison // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double> // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> - // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2) + // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %[[FR]], i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2) return _mm_i64gather_pd(b, c, 2); } @@ -602,8 +617,9 @@ __m256d test_mm256_i64gather_pd(double const *b, __m256i c) { // CHECK-LABEL: test_mm256_i64gather_pd + // CHECK: %[[FR:.*]] = freeze <4 x double> poison // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}} - // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2) + // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %[[FR]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2) return _mm256_i64gather_pd(b, c, 2); } @@ -615,10 +631,12 @@ __m128 test_mm_i64gather_ps(float const *b, __m128i c) { // CHECK-LABEL: test_mm_i64gather_ps + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float> // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> - // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2) + // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %[[FR_BC]], i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2) return _mm_i64gather_ps(b, c, 2); } @@ -630,10 +648,12 @@ __m128 test_mm256_i64gather_ps(float const *b, __m256i c) { // CHECK-LABEL: test_mm256_i64gather_ps + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float> // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> - // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2) + // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %[[FR_BC]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2) return _mm256_i64gather_ps(b, c, 2); } diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -3780,25 +3780,32 @@ __m512 test_mm512_undefined() { // CHECK-LABEL: @test_mm512_undefined - // CHECK: ret <16 x float> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <8 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <16 x float> + // CHECK: ret <16 x float> %[[FR_BC]] return _mm512_undefined(); } __m512 test_mm512_undefined_ps() { // CHECK-LABEL: @test_mm512_undefined_ps - // CHECK: ret <16 x float> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <8 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <16 x float> + // CHECK: ret <16 x float> %[[FR_BC]] return _mm512_undefined_ps(); } __m512d test_mm512_undefined_pd() { // CHECK-LABEL: @test_mm512_undefined_pd - // CHECK: ret <8 x double> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <8 x double> poison + // CHECK: ret <8 x double> %[[FR]] return _mm512_undefined_pd(); } __m512i test_mm512_undefined_epi32() { // CHECK-LABEL: @test_mm512_undefined_epi32 - // CHECK: ret <8 x i64> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <8 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <8 x i64> + // CHECK: ret <8 x i64> %[[FR_BC]] return _mm512_undefined_epi32(); } diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -786,7 +786,9 @@ __m128 test_mm_undefined_ps() { // CHECK-LABEL: test_mm_undefined_ps - // CHECK: ret <4 x float> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float> + // CHECK: ret <4 x float> %[[FR_BC]] return _mm_undefined_ps(); } diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1630,13 +1630,16 @@ __m128d test_mm_undefined_pd() { // CHECK-LABEL: test_mm_undefined_pd - // CHECK: ret <2 x double> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: ret <2 x double> %[[FR]] return _mm_undefined_pd(); } __m128i test_mm_undefined_si128() { // CHECK-LABEL: test_mm_undefined_si128 - // CHECK: ret <2 x i64> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64> + // CHECK: ret <2 x i64> %[[FR_BC]] return _mm_undefined_si128(); }