Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -19648,6 +19648,15 @@ Src2, Src1); return DAG.getBitcast(VT, Res); } + case MASK_BINOP: { + MVT VT = Op.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()); + + SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl); + SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl); + SDValue Res = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Src2); + return DAG.getBitcast(VT, Res); + } case FIXUPIMMS: case FIXUPIMMS_MASKZ: case FIXUPIMM: @@ -19820,6 +19829,33 @@ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + case Intrinsic::x86_avx512_knot_w: { + SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1)); + SDValue RHS = DAG.getConstant(1, dl, MVT::v16i1); + SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS); + return DAG.getBitcast(MVT::i16, Res); + } + + case Intrinsic::x86_avx512_kandn_w: { + SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1)); + // Invert LHS for the not. + LHS = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, + DAG.getConstant(1, dl, MVT::v16i1)); + SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2)); + SDValue Res = DAG.getNode(ISD::AND, dl, MVT::v16i1, LHS, RHS); + return DAG.getBitcast(MVT::i16, Res); + } + + case Intrinsic::x86_avx512_kxnor_w: { + SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1)); + SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2)); + SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS); + // Invert result for the not. + Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, Res, + DAG.getConstant(1, dl, MVT::v16i1)); + return DAG.getBitcast(MVT::i16, Res); + } + case Intrinsic::x86_sse42_pcmpistria128: case Intrinsic::x86_sse42_pcmpestria128: case Intrinsic::x86_sse42_pcmpistric128: Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -2381,15 +2381,6 @@ defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>; -multiclass avx512_mask_unop_int { - let Predicates = [HasAVX512] in - def : Pat<(!cast("int_x86_avx512_"##IntName##"_w") - (i16 GR16:$src)), - (COPY_TO_REGCLASS (!cast(InstName##"Wrr") - (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>; -} -defm : avx512_mask_unop_int<"knot", "KNOT">; - // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit let Predicates = [HasAVX512, NoDQI] in def : Pat<(vnot VK8:$src), @@ -2438,21 +2429,6 @@ defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>; defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>; -multiclass avx512_mask_binop_int { - let Predicates = [HasAVX512] in - def : Pat<(!cast("int_x86_avx512_"##IntName##"_w") - (i16 GR16:$src1), (i16 GR16:$src2)), - (COPY_TO_REGCLASS (!cast(InstName##"Wrr") - (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)), - (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>; -} - -defm : avx512_mask_binop_int<"kand", "KAND">; -defm : avx512_mask_binop_int<"kandn", "KANDN">; -defm : avx512_mask_binop_int<"kor", "KOR">; -defm : avx512_mask_binop_int<"kxnor", "KXNOR">; -defm : avx512_mask_binop_int<"kxor", "KXOR">; - multiclass avx512_binop_pat { // With AVX512F, 8-bit mask is promoted to 16-bit mask, Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -36,7 +36,7 @@ TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, - FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK, GATHER_AVX2 + FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK, GATHER_AVX2, MASK_BINOP, }; struct IntrinsicData { @@ -474,10 +474,12 @@ X86_INTRINSIC_DATA(avx512_cvtw2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), + X86_INTRINSIC_DATA(avx512_kand_w, MASK_BINOP, ISD::AND, 0), + X86_INTRINSIC_DATA(avx512_kor_w, MASK_BINOP, ISD::OR, 0), X86_INTRINSIC_DATA(avx512_kunpck_bw, KUNPCK, ISD::CONCAT_VECTORS, 0), X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0), X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0), - + X86_INTRINSIC_DATA(avx512_kxor_w, MASK_BINOP, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD, Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -33,12 +33,12 @@ define i16 @test_kand(i16 %a0, i16 %a1) { ; CHECK-LABEL: test_kand: ; CHECK: ## BB#0: -; CHECK-NEXT: movw $8, %ax -; CHECK-NEXT: kmovw %eax, %k0 +; CHECK-NEXT: kmovw %esi, %k0 ; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: movw $8, %ax +; CHECK-NEXT: kmovw %eax, %k2 ; CHECK-NEXT: kandw %k0, %k1, %k0 -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 +; CHECK-NEXT: kandw %k0, %k2, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8) @@ -50,12 +50,12 @@ define i16 @test_kandn(i16 %a0, i16 %a1) { ; CHECK-LABEL: test_kandn: ; CHECK: ## BB#0: -; CHECK-NEXT: movw $8, %ax -; CHECK-NEXT: kmovw %eax, %k0 +; CHECK-NEXT: kmovw %esi, %k0 ; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: movw $8, %ax +; CHECK-NEXT: kmovw %eax, %k2 +; CHECK-NEXT: kandnw %k2, %k1, %k1 ; CHECK-NEXT: kandnw %k0, %k1, %k0 -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: kandnw %k1, %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8) @@ -79,12 +79,12 @@ define i16 @test_kor(i16 %a0, i16 %a1) { ; CHECK-LABEL: test_kor: ; CHECK: ## BB#0: -; CHECK-NEXT: movw $8, %ax -; CHECK-NEXT: kmovw %eax, %k0 +; CHECK-NEXT: kmovw %esi, %k0 ; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: movw $8, %ax +; CHECK-NEXT: kmovw %eax, %k2 ; CHECK-NEXT: korw %k0, %k1, %k0 -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 +; CHECK-NEXT: korw %k0, %k2, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8) @@ -110,12 +110,12 @@ define i16 @test_kxnor(i16 %a0, i16 %a1) { ; CHECK-LABEL: test_kxnor: ; CHECK: ## BB#0: -; CHECK-NEXT: movw $8, %ax -; CHECK-NEXT: kmovw %eax, %k0 +; CHECK-NEXT: kmovw %esi, %k0 ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: kxnorw %k0, %k1, %k0 -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: kxnorw %k1, %k0, %k0 +; CHECK-NEXT: movw $8, %ax +; CHECK-NEXT: kmovw %eax, %k2 +; CHECK-NEXT: kxorw %k0, %k1, %k0 +; CHECK-NEXT: kxorw %k0, %k2, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8) @@ -127,12 +127,12 @@ define i16 @test_kxor(i16 %a0, i16 %a1) { ; CHECK-LABEL: test_kxor: ; CHECK: ## BB#0: -; CHECK-NEXT: movw $8, %ax -; CHECK-NEXT: kmovw %eax, %k0 +; CHECK-NEXT: kmovw %esi, %k0 ; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: movw $8, %ax +; CHECK-NEXT: kmovw %eax, %k2 ; CHECK-NEXT: kxorw %k0, %k1, %k0 -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: kxorw %k1, %k0, %k0 +; CHECK-NEXT: kxorw %k0, %k2, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8)