diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -12491,12 +12491,9 @@ case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: - // The x86 definition of "undef" is not the same as the LLVM definition - // (PR32176). We leave optimizing away an unnecessary zero constant to the - // IR optimizer and backend. - // TODO: If we had a "freeze" IR instruction to generate a fixed undef - // value, we should use that here instead of a zero. - return llvm::Constant::getNullValue(ConvertType(E->getType())); + // The x86 definition of "undef" is equivalent to "freeze poison" in LLVM + // (PR32176). + return Builder.CreateFreeze(PoisonValue::get(ConvertType(E->getType()))); case X86::BI__builtin_ia32_vec_init_v8qi: case X86::BI__builtin_ia32_vec_init_v4hi: case X86::BI__builtin_ia32_vec_init_v2si: diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -2063,19 +2063,24 @@ __m256 test_mm256_undefined_ps() { // CHECK-LABEL: test_mm256_undefined_ps - // CHECK: ret <8 x float> zeroinitializer + // CHECK: freeze <4 x double> poison + // CHECK: bitcast <4 x double> %{{.*}} to <8 x float> + // CHECK: ret <8 x float> %{{.*}} return _mm256_undefined_ps(); } __m256d test_mm256_undefined_pd() { // CHECK-LABEL: test_mm256_undefined_pd - // CHECK: ret <4 x double> zeroinitializer + // CHECK: freeze <4 x double> poison + // CHECK: ret <4 x double> %{{.*}} return _mm256_undefined_pd(); } __m256i test_mm256_undefined_si256() { // CHECK-LABEL: test_mm256_undefined_si256 - // CHECK: ret <4 x i64> zeroinitializer + // CHECK: freeze <4 x double> poison + // CHECK: bitcast <4 x double> %{{.*}} to <4 x i64> + // CHECK: ret <4 x i64> %{{.*}} return _mm256_undefined_si256(); } diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -455,7 +455,9 @@ __m128i test_mm_i32gather_epi64(long long const *b, __m128i c) { // CHECK-LABEL: test_mm_i32gather_epi64 - // CHECK: call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2) + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64> + // CHECK: call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2) return _mm_i32gather_epi64(b, c, 2); } @@ -467,7 +469,9 @@ __m256i test_mm256_i32gather_epi64(long long const *b, __m128i c) { // CHECK-LABEL: test_mm256_i32gather_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2) + // CHECK: %[[FR:.*]] = freeze <4 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <4 x i64> + // CHECK: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2) return _mm256_i32gather_epi64(b, c, 2); } @@ -479,10 +483,11 @@ __m128d test_mm_i32gather_pd(double const *b, __m128i c) { // CHECK-LABEL: test_mm_i32gather_pd + // CHECK: %[[FR:.*]] = freeze <2 x double> poison // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double> // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> - // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2) + // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %[[FR]], i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2) return _mm_i32gather_pd(b, c, 2); } @@ -494,10 +499,11 @@ __m256d test_mm256_i32gather_pd(double const *b, __m128i c) { // CHECK-LABEL: test_mm256_i32gather_pd + // CHECK: %[[FR:.*]] = freeze <4 x double> poison // CHECK: [[CMP:%.*]] = fcmp oeq <4 x double> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double> - // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2) + // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %[[FR]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2) return _mm256_i32gather_pd(b, c, 2); } @@ -509,10 +515,12 @@ __m128 test_mm_i32gather_ps(float const *b, __m128i c) { // CHECK-LABEL: test_mm_i32gather_ps + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float> // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> - // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2) + // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2) return _mm_i32gather_ps(b, c, 2); } @@ -524,10 +532,12 @@ __m256 test_mm256_i32gather_ps(float const *b, __m256i c) { // CHECK-LABEL: test_mm256_i32gather_ps + // CHECK: %[[FR:.*]] = freeze <4 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <8 x float> // CHECK: [[CMP:%.*]] = fcmp oeq <8 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float> - // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2) + // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %[[FR_BC]], i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2) return _mm256_i32gather_ps(b, c, 2); } @@ -563,7 +573,9 @@ __m128i test_mm_i64gather_epi64(long long const *b, __m128i c) { // CHECK-LABEL: test_mm_i64gather_epi64 - // CHECK: call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2) + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64> + // CHECK: call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %[[FR_BC]], i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2) return _mm_i64gather_epi64(b, c, 2); } @@ -575,7 +587,9 @@ __m256i test_mm256_i64gather_epi64(long long const *b, __m256i c) { // CHECK-LABEL: test_mm256_i64gather_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2) + // CHECK: %[[FR:.*]] = freeze <4 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <4 x i64> + // CHECK: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2) return _mm256_i64gather_epi64(b, c, 2); } @@ -587,10 +601,11 @@ __m128d test_mm_i64gather_pd(double const *b, __m128i c) { // CHECK-LABEL: test_mm_i64gather_pd + // CHECK: %[[FR:.*]] = freeze <2 x double> poison // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double> // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> - // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2) + // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %[[FR]], i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2) return _mm_i64gather_pd(b, c, 2); } @@ -602,8 +617,9 @@ __m256d test_mm256_i64gather_pd(double const *b, __m256i c) { // CHECK-LABEL: test_mm256_i64gather_pd + // CHECK: %[[FR:.*]] = freeze <4 x double> poison // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}} - // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2) + // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %[[FR]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2) return _mm256_i64gather_pd(b, c, 2); } @@ -615,10 +631,12 @@ __m128 test_mm_i64gather_ps(float const *b, __m128i c) { // CHECK-LABEL: test_mm_i64gather_ps + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float> // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> - // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2) + // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %[[FR_BC]], i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2) return _mm_i64gather_ps(b, c, 2); } @@ -630,10 +648,12 @@ __m128 test_mm256_i64gather_ps(float const *b, __m256i c) { // CHECK-LABEL: test_mm256_i64gather_ps + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float> // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> - // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2) + // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %[[FR_BC]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2) return _mm256_i64gather_ps(b, c, 2); } diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -3780,25 +3780,32 @@ __m512 test_mm512_undefined() { // CHECK-LABEL: @test_mm512_undefined - // CHECK: ret <16 x float> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <8 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <16 x float> + // CHECK: ret <16 x float> %[[FR_BC]] return _mm512_undefined(); } __m512 test_mm512_undefined_ps() { // CHECK-LABEL: @test_mm512_undefined_ps - // CHECK: ret <16 x float> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <8 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <16 x float> + // CHECK: ret <16 x float> %[[FR_BC]] return _mm512_undefined_ps(); } __m512d test_mm512_undefined_pd() { // CHECK-LABEL: @test_mm512_undefined_pd - // CHECK: ret <8 x double> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <8 x double> poison + // CHECK: ret <8 x double> %[[FR]] return _mm512_undefined_pd(); } __m512i test_mm512_undefined_epi32() { // CHECK-LABEL: @test_mm512_undefined_epi32 - // CHECK: ret <8 x i64> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <8 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <8 x i64> + // CHECK: ret <8 x i64> %[[FR_BC]] return _mm512_undefined_epi32(); } diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -786,7 +786,9 @@ __m128 test_mm_undefined_ps() { // CHECK-LABEL: test_mm_undefined_ps - // CHECK: ret <4 x float> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float> + // CHECK: ret <4 x float> %[[FR_BC]] return _mm_undefined_ps(); } diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1630,13 +1630,16 @@ __m128d test_mm_undefined_pd() { // CHECK-LABEL: test_mm_undefined_pd - // CHECK: ret <2 x double> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: ret <2 x double> %[[FR]] return _mm_undefined_pd(); } __m128i test_mm_undefined_si128() { // CHECK-LABEL: test_mm_undefined_si128 - // CHECK: ret <2 x i64> zeroinitializer + // CHECK: %[[FR:.*]] = freeze <2 x double> poison + // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64> + // CHECK: ret <2 x i64> %[[FR_BC]] return _mm_undefined_si128(); } diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -207,6 +207,7 @@ inline bool isTargetOpcode() const; inline bool isMachineOpcode() const; inline bool isUndef() const; + inline bool isFreezeUndef() const; inline unsigned getMachineOpcode() const; inline const DebugLoc &getDebugLoc() const; inline void dump() const; @@ -1150,6 +1151,10 @@ return Node->isUndef(); } +inline bool SDValue::isFreezeUndef() const { + return Node->getOpcode() == ISD::FREEZE && Node->getOperand(0).isUndef(); +} + inline bool SDValue::use_empty() const { return !Node->hasAnyUseOfValue(ResNo); } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12497,6 +12497,10 @@ if (N0.isUndef()) return DAG.getUNDEF(VT); + // bitcast (freeze undef) -> freeze undef + if (N0.isFreezeUndef() && N0.hasOneUse()) + return DAG.getFreeze(DAG.getUNDEF(VT)); + // If the input is a BUILD_VECTOR with all constant elements, fold this now. // Only do this before legalize types, unless both types are integer and the // scalar type is legal. Only do this before legalize ops, since the target diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26011,10 +26011,11 @@ TLI.getPointerTy(DAG.getDataLayout())); EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger(); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other); - // If source is undef or we know it won't be used, use a zero vector - // to break register dependency. + // If source is undef, frozen undef with one use only, or we + // know it won't be used, use a zero vector to break register dependency. // TODO: use undef instead and let BreakFalseDeps deal with it? - if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) + if (Src.isUndef() || (Src.isFreezeUndef() && Src.hasOneUse()) || + ISD::isBuildVectorAllOnes(Mask.getNode())) Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); // Cast mask to an integer type. @@ -26052,10 +26053,12 @@ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other); - // If source is undef or we know it won't be used, use a zero vector - // to break register dependency. + // If source is undef, frozen undef with one use only, or we + // know it won't be used, use a zero vector to break register dependency. // TODO: use undef instead and let BreakFalseDeps deal with it? - if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) + // TODO: use undef instead and let BreakFalseDeps deal with it? + if (Src.isUndef() || (Src.isFreezeUndef() && Src.hasOneUse()) || + ISD::isBuildVectorAllOnes(Mask.getNode())) Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); MemIntrinsicSDNode *MemIntr = cast(Op); diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -2965,32 +2965,55 @@ } declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone -define <2 x double> @test_mm_undefined_pd() nounwind { -; CHECK-LABEL: test_mm_undefined_pd: -; CHECK: # %bb.0: -; CHECK-NEXT: ret{{[l|q]}} - ret <2 x double> undef -} - define <4 x double> @test_mm256_undefined_pd() nounwind { ; CHECK-LABEL: test_mm256_undefined_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: ret{{[l|q]}} - ret <4 x double> undef + %v = freeze <4 x double> poison + ret <4 x double> %v } define <8 x float> @test_mm256_undefined_ps() nounwind { ; CHECK-LABEL: test_mm256_undefined_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: ret{{[l|q]}} - ret <8 x float> undef + %v = freeze <4 x double> poison + %w = bitcast <4 x double> %v to <8 x float> + ret <8 x float> %w } define <4 x i64> @test_mm256_undefined_si256() nounwind { ; CHECK-LABEL: test_mm256_undefined_si256: ; CHECK: # %bb.0: ; CHECK-NEXT: ret{{[l|q]}} - ret <4 x i64> undef + %v = freeze <4 x double> poison + %w = bitcast <4 x double> %v to <4 x i64> + ret <4 x i64> %w +} + +define <16 x float> @test_mm512_undefined() nounwind { +; CHECK-LABEL: test_mm512_undefined: +; CHECK: # %bb.0: +; CHECK-NEXT: ret{{[l|q]}} + %v = freeze <8 x double> poison + %w = bitcast <8 x double> %v to <16 x float> + ret <16 x float> %w +} + +define <8 x double> @test_mm512_undefined_pd() nounwind { +; CHECK-LABEL: test_mm512_undefined_pd: +; CHECK: # %bb.0: +; CHECK-NEXT: ret{{[l|q]}} + %v = freeze <8 x double> poison + ret <8 x double> %v +} + +define <8 x i64> @test_mm512_undefined_epi32() nounwind { +; CHECK-LABEL: test_mm512_undefined_epi32: +; CHECK: # %bb.0: +; CHECK-NEXT: ret{{[l|q]}} + %v = freeze <8 x i64> poison + ret <8 x i64> %v } define <4 x double> @test_mm256_unpackhi_pd(<4 x double> %a0, <4 x double> %a1) nounwind { diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -3513,7 +3513,9 @@ ; CHECK-LABEL: test_mm_undefined_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - ret <4 x float> undef + %v = freeze <2 x double> poison + %w = bitcast <2 x double> %v to <4 x float> + ret <4 x float> %w } define <4 x float> @test_mm_unpackhi_ps(<4 x float> %a0, <4 x float> %a1) nounwind { diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -6390,14 +6390,17 @@ ; CHECK-LABEL: test_mm_undefined_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - ret <2 x double> undef + %v = freeze <2 x double> poison + ret <2 x double> %v } define <2 x i64> @test_mm_undefined_si128() { ; CHECK-LABEL: test_mm_undefined_si128: ; CHECK: # %bb.0: ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - ret <2 x i64> undef + %v = freeze <2 x double> poison + %w = bitcast <2 x double> %v to <2 x i64> + ret <2 x i64> %w } define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {