Index: lib/IR/AutoUpgrade.cpp =================================================================== --- lib/IR/AutoUpgrade.cpp +++ lib/IR/AutoUpgrade.cpp @@ -78,6 +78,7 @@ Name=="ssse3.pabs.d.128" || // Added in 6.0 Name.startswith("avx2.pabs.") || // Added in 6.0 Name.startswith("avx512.mask.pabs.") || // Added in 6.0 + Name.startswith("avx512.broadcastm") || // Added in 6.0 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0 Name.startswith("sse2.pcmpeq.") || // Added in 3.1 Name.startswith("sse2.pcmpgt.") || // Added in 3.1 @@ -1027,7 +1028,15 @@ Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT, CI->getArgOperand(0), CI->getArgOperand(1)); Rep = Builder.CreateSExt(Rep, CI->getType(), ""); - } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){ + } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) { + Type *ExtTy = Type::getInt32Ty(C); + if (CI->getOperand(0)->getType()->isIntegerTy(8)) + ExtTy = Type::getInt64Ty(C); + unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / + ExtTy->getPrimitiveSizeInBits(); + Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy); + Rep = Builder.CreateVectorSplat(NumElts, Rep); + } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))) { unsigned NumElts = CI->getArgOperand(1)->getType()->getVectorNumElements(); Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -422,12 +422,6 @@ X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0), X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(avx512_broadcastmb_128, BROADCASTM, X86ISD::VBROADCASTM, 0), - X86_INTRINSIC_DATA(avx512_broadcastmb_256, BROADCASTM, X86ISD::VBROADCASTM, 0), - X86_INTRINSIC_DATA(avx512_broadcastmb_512, BROADCASTM, X86ISD::VBROADCASTM, 0), - X86_INTRINSIC_DATA(avx512_broadcastmw_128, BROADCASTM, X86ISD::VBROADCASTM, 0), - X86_INTRINSIC_DATA(avx512_broadcastmw_256, BROADCASTM, X86ISD::VBROADCASTM, 0), - X86_INTRINSIC_DATA(avx512_broadcastmw_512, BROADCASTM, X86ISD::VBROADCASTM, 0), X86_INTRINSIC_DATA(avx512_cvtb2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), X86_INTRINSIC_DATA(avx512_cvtb2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), X86_INTRINSIC_DATA(avx512_cvtb2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), Index: test/CodeGen/X86/avx512cd-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512cd-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512cd-intrinsics-upgrade.ll @@ -45,3 +45,26 @@ %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) ret <8 x i64> %res } + +define <16 x i32> @test_x86_vbroadcastmw_512(i16 %a0) { +; CHECK-LABEL: test_x86_vbroadcastmw_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: vpbroadcastd %eax, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16 %a0) + ret <16 x i32> %res +} +declare <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16) + +define <8 x i64> @test_x86_broadcastmb_512(i8 %a0) { +; CHECK-LABEL: test_x86_broadcastmb_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: vpbroadcastq %rax, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8 %a0) + ret <8 x i64> %res +} +declare <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8) + Index: test/CodeGen/X86/avx512cd-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512cd-intrinsics.ll +++ test/CodeGen/X86/avx512cd-intrinsics.ll @@ -1,28 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s -define <16 x i32> @test_x86_vbroadcastmw_512(i16 %a0) { -; CHECK-LABEL: test_x86_vbroadcastmw_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: vpbroadcastmw2d %k0, %zmm0 -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16 %a0) - ret <16 x i32> %res -} -declare <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16) - -define <8 x i64> @test_x86_broadcastmb_512(i8 %a0) { -; CHECK-LABEL: test_x86_broadcastmb_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: vpbroadcastmb2q %k0, %zmm0 -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8 %a0) - ret <8 x i64> %res -} -declare <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8) - declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly define <8 x i64> @test_conflict_q(<8 x i64> %a) { Index: test/CodeGen/X86/avx512cdvl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512cdvl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512cdvl-intrinsics-upgrade.ll @@ -69,3 +69,47 @@ ret <4 x i64> %res2 } +define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) { +; CHECK-LABEL: test_x86_vbroadcastmw_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: vpbroadcastd %eax, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ; + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16) + +define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) { +; CHECK-LABEL: test_x86_vbroadcastmw_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: vpbroadcastd %eax, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16) + +define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) { +; CHECK-LABEL: test_x86_broadcastmb_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: vpbroadcastq %rax, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8) + +define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) { +; CHECK-LABEL: test_x86_broadcastmb_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: vpbroadcastq %rax, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8) + Index: test/CodeGen/X86/avx512cdvl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512cdvl-intrinsics.ll +++ test/CodeGen/X86/avx512cdvl-intrinsics.ll @@ -147,46 +147,3 @@ ret <4 x i64> %res2 } -define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) { -; CHECK-LABEL: test_x86_vbroadcastmw_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: vpbroadcastmw2d %k0, %ymm0 -; CHECK-NEXT: retq - %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ; - ret <8 x i32> %res -} -declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16) - -define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) { -; CHECK-LABEL: test_x86_vbroadcastmw_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: vpbroadcastmw2d %k0, %xmm0 -; CHECK-NEXT: retq - %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ; - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16) - -define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) { -; CHECK-LABEL: test_x86_broadcastmb_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: vpbroadcastmb2q %k0, %ymm0 -; CHECK-NEXT: retq - %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ; - ret <4 x i64> %res -} -declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8) - -define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) { -; CHECK-LABEL: test_x86_broadcastmb_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: vpbroadcastmb2q %k0, %xmm0 -; CHECK-NEXT: retq - %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ; - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8)