Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td @@ -1668,20 +1668,6 @@ [IntrReadMem, IntrArgMemOnly]>; } -// Conditional move ops -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_mask_move_ss : - GCCBuiltin<"__builtin_ia32_movss_mask">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_move_sd : - GCCBuiltin<"__builtin_ia32_movsd_mask">, - Intrinsic<[llvm_v2f64_ty], - [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrNoMem]>; -} - // Conditional store ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">, Index: llvm/trunk/lib/IR/AutoUpgrade.cpp =================================================================== --- llvm/trunk/lib/IR/AutoUpgrade.cpp +++ llvm/trunk/lib/IR/AutoUpgrade.cpp @@ -365,6 +365,7 @@ Name.startswith("avx2.pblendd.") || Name.startswith("avx.vbroadcastf128") || Name == "avx2.vbroadcasti128" || + Name.startswith("avx512.mask.move.s") || Name == "xop.vpcmov" || (Name.startswith("xop.vpcom") && F->arg_size() == 2))) { NewFn = nullptr; @@ -679,6 +680,20 @@ std::max(NumElts, 8U))); } +static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) { + Value* A = CI.getArgOperand(0); + Value* B = CI.getArgOperand(1); + Value* Src = CI.getArgOperand(2); + Value* Mask = CI.getArgOperand(3); + + Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1)); + Value* Cmp = Builder.CreateIsNotNull(AndNode); + Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0); + Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0); + Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2); + return Builder.CreateInsertElement(A, Select, (uint64_t)0); +} + // Replace a masked intrinsic with an older unmasked intrinsic. static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, Intrinsic::ID IID) { @@ -1341,6 +1356,8 @@ Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); + } else if (IsX86 && Name.startswith("avx512.mask.move.s")) { + Rep = upgradeMaskedMove(Builder, *CI); } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) { VectorType *VecTy = cast(CI->getType()); Intrinsic::ID IID; Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -794,10 +794,6 @@ X86ISD::FMIN_RND, 0), X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_move_sd, INTR_TYPE_SCALAR_MASK, - X86ISD::MOVSD, 0), - X86_INTRINSIC_DATA(avx512_mask_move_ss, INTR_TYPE_SCALAR_MASK, - X86ISD::MOVSS, 0), X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL, X86ISD::FMUL_RND), X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL, Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -1765,6 +1765,54 @@ ret <8 x i64> %res2 } +define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; CHECK-LABEL: test_mm_mask_move_ss: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq +entry: + %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U) + ret <4 x float> %res +} + + +define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; CHECK-LABEL: test_mm_maskz_move_ss: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: retq +entry: + %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U) + ret <4 x float> %res +} + +define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; CHECK-LABEL: test_mm_mask_move_sd: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq +entry: + %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U) + ret <2 x double> %res +} + +define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; CHECK-LABEL: test_mm_maskz_move_sd: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: retq +entry: + %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U) + ret <2 x double> %res +} + +declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8) +declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8) + declare <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8>, <16 x i32>, i16) define <16 x i32>@test_int_x86_avx512_mask_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) {