diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37487,7 +37487,9 @@ // 32-bit targets have to bitcast i64 to f64, so better to bitcast upward. if (Src.getOpcode() == ISD::BITCAST && SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits() && - DAG.getTargetLoweringInfo().isTypeLegal(BCVT)) { + DAG.getTargetLoweringInfo().isTypeLegal(BCVT) && + FixedVectorType::isValidElementType( + BCVT.getScalarType().getTypeForEVT(*DAG.getContext()))) { EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(), VT.getVectorNumElements()); return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC)); diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll --- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr='+avx,+mmx' | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr='+avx,+mmx' | FileCheck %s --check-prefix=X64 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp { ; X32-LABEL: A: @@ -972,6 +972,30 @@ ret float %9 } +define <8 x i16> @broadcast_x86_mmx(x86_mmx %tmp) nounwind { +; X32-LABEL: broadcast_x86_mmx: +; X32: ## %bb.0: ## %bb +; X32-NEXT: subl $12, %esp +; X32-NEXT: movq %mm0, (%esp) +; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl +; +; X64-LABEL: broadcast_x86_mmx: +; X64: ## %bb.0: ## %bb +; X64-NEXT: movdq2q %xmm0, %mm0 +; X64-NEXT: movq2dq %mm0, %xmm0 +; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] +; X64-NEXT: retq +bb: + %tmp1 = bitcast x86_mmx %tmp to i64 + %tmp2 = insertelement <2 x i64> undef, i64 %tmp1, i32 0 + %tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16> + %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> poison, <8 x i32> + ret <8 x i16> %tmp4 +} + declare void @gfunc(<4 x float>*) declare void @llvm.lifetime.start.p0i8(i64, i8*) declare void @llvm.lifetime.end.p0i8(i64, i8*) diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll --- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX2 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2 -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX512VL -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2,+mmx | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX2 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2,+mmx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2 +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512vl,+avx512dq,+mmx | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX512VL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl,+avx512dq,+mmx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp { ; X32-LABEL: BB16: @@ -1448,3 +1448,34 @@ store <4 x i64> %splat.splat, <4 x i64>* %__b.addr.i, align 16 ret void } + +define <8 x i16> @broadcast_x86_mmx(x86_mmx %tmp) nounwind { +; X32-LABEL: broadcast_x86_mmx: +; X32: ## %bb.0: ## %bb +; X32-NEXT: subl $12, %esp +; X32-NEXT: movq %mm0, (%esp) +; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl +; +; X64-AVX2-LABEL: broadcast_x86_mmx: +; X64-AVX2: ## %bb.0: ## %bb +; X64-AVX2-NEXT: movdq2q %xmm0, %mm0 +; X64-AVX2-NEXT: movq %mm0, %rax +; X64-AVX2-NEXT: vmovq %rax, %xmm0 +; X64-AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 +; X64-AVX2-NEXT: retq +; +; X64-AVX512VL-LABEL: broadcast_x86_mmx: +; X64-AVX512VL: ## %bb.0: ## %bb +; X64-AVX512VL-NEXT: movdq2q %xmm0, %mm0 +; X64-AVX512VL-NEXT: movq %mm0, %rax +; X64-AVX512VL-NEXT: vpbroadcastq %rax, %xmm0 +; X64-AVX512VL-NEXT: retq +bb: + %tmp1 = bitcast x86_mmx %tmp to i64 + %tmp2 = insertelement <2 x i64> undef, i64 %tmp1, i32 0 + %tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16> + %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> poison, <8 x i32> + ret <8 x i16> %tmp4 +}