Index: ../lib/Target/X86/X86ISelLowering.cpp =================================================================== --- ../lib/Target/X86/X86ISelLowering.cpp +++ ../lib/Target/X86/X86ISelLowering.cpp @@ -16375,7 +16375,27 @@ Mask, PassThru, Subtarget, DAG); } case VPERM_3OP_MASKZ: - case VPERM_3OP_MASK: + case VPERM_3OP_MASK:{ + // Src2 is the PassThru + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Src3 = Op.getOperand(3); + SDValue Mask = Op.getOperand(4); + MVT VT = Op.getSimpleValueType(); + SDValue PassThru = SDValue(); + + // set PassThru element + if (IntrData->Type == VPERM_3OP_MASKZ) + PassThru = getZeroVector(VT, Subtarget, DAG, dl); + else + PassThru = Src2; + + // Swap Src1 and Src2 in the node creation + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, + dl, Op.getValueType(), + Src2, Src1, Src3), + Mask, PassThru, Subtarget, DAG); + } case FMA_OP_MASK3: case FMA_OP_MASKZ: case FMA_OP_MASK: { @@ -16387,7 +16407,7 @@ SDValue PassThru = SDValue(); // set PassThru element - if (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ) + if (IntrData->Type == FMA_OP_MASKZ) PassThru = getZeroVector(VT, Subtarget, DAG, dl); else if (IntrData->Type == FMA_OP_MASK3) PassThru = Src3; Index: ../lib/Target/X86/X86InstrAVX512.td =================================================================== --- ../lib/Target/X86/X86InstrAVX512.td +++ ../lib/Target/X86/X86InstrAVX512.td @@ -1137,7 +1137,7 @@ //===----------------------------------------------------------------------===// // -- VPERM2I - 3 source operands form -- -multiclass avx512_perm_3src opc, string OpcodeStr, +multiclass avx512_perm_i opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { let Constraints = "$src1 = $dst" in { defm rr: AVX512_maskable_3src opc, string OpcodeStr, +multiclass avx512_perm_i_mb opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { let mayLoad = 1, Constraints = "$src1 = $dst" in defm rmb: AVX512_maskable_3src opc, string OpcodeStr, +multiclass avx512_perm_i_sizes opc, string OpcodeStr, SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { - let Predicates = [HasAVX512] in - defm NAME: avx512_perm_3src, - avx512_perm_3src_mb, EVEX_V512; + defm NAME: avx512_perm_i, + avx512_perm_i_mb, EVEX_V512; let Predicates = [HasVLX] in { - defm NAME#128: avx512_perm_3src, - avx512_perm_3src_mb, - EVEX_V128; - defm NAME#256: avx512_perm_3src, - avx512_perm_3src_mb, - EVEX_V256; + defm NAME#128: avx512_perm_i, + avx512_perm_i_mb, EVEX_V128; + defm NAME#256: avx512_perm_i, + avx512_perm_i_mb, EVEX_V256; } } -multiclass avx512_perm_3src_sizes_w opc, string OpcodeStr, - SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { + +multiclass avx512_perm_i_sizes_w opc, string OpcodeStr, + SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasBWI] in - defm NAME: avx512_perm_3src, - avx512_perm_3src_mb, - EVEX_V512; + defm NAME: avx512_perm_i, EVEX_V512; let Predicates = [HasBWI, HasVLX] in { - defm NAME#128: avx512_perm_3src, - avx512_perm_3src_mb, - EVEX_V128; - defm NAME#256: avx512_perm_3src, - avx512_perm_3src_mb, - EVEX_V256; - } -} -defm VPERMI2D : avx512_perm_3src_sizes<0x76, "vpermi2d", X86VPermiv3, - avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q : avx512_perm_3src_sizes<0x76, "vpermi2q", X86VPermiv3, - avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2PS : avx512_perm_3src_sizes<0x77, "vpermi2ps", X86VPermiv3, - avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_3src_sizes<0x77, "vpermi2pd", X86VPermiv3, - avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VPERMT2D : avx512_perm_3src_sizes<0x7E, "vpermt2d", X86VPermv3, - avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMT2Q : avx512_perm_3src_sizes<0x7E, "vpermt2q", X86VPermv3, - avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMT2PS : avx512_perm_3src_sizes<0x7F, "vpermt2ps", X86VPermv3, - avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMT2PD : avx512_perm_3src_sizes<0x7F, "vpermt2pd", X86VPermv3, - avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VPERMT2W : avx512_perm_3src_sizes_w<0x7D, "vpermt2w", X86VPermv3, - avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; -defm VPERMI2W : avx512_perm_3src_sizes_w<0x75, "vpermi2w", X86VPermiv3, - avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; + defm NAME#128: avx512_perm_i, EVEX_V128; + defm NAME#256: avx512_perm_i, EVEX_V256; + } +} + +defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", X86VPermi2X, + avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", X86VPermi2X, + avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2W : avx512_perm_i_sizes_w<0x75, "vpermi2w", X86VPermi2X, + avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; +defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", X86VPermi2X, + avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", X86VPermi2X, + avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; + +// VPERMT +multiclass avx512_perm_t opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _, + X86VectorVTInfo MaskVT> { +let Constraints = "$src1 = $dst" in { + defm rr: AVX512_maskable_3src, EVEX_4V, + AVX5128IBase; + + let mayLoad = 1 in + defm rm: AVX512_maskable_3src, + EVEX_4V, AVX5128IBase; + } +} +multiclass avx512_perm_t_mb opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _, + X86VectorVTInfo MaskVT> { + let mayLoad = 1, Constraints = "$src1 = $dst" in + defm rmb: AVX512_maskable_3src, + AVX5128IBase, EVEX_4V, EVEX_B; +} + +multiclass avx512_perm_t_sizes opc, string OpcodeStr, + SDNode OpNode, AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo ShuffleMask> { + defm NAME: avx512_perm_t, + avx512_perm_t_mb, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#128: avx512_perm_t, + avx512_perm_t_mb, EVEX_V128; + defm NAME#256: avx512_perm_t, + avx512_perm_t_mb, EVEX_V256; + } +} + +multiclass avx512_perm_t_sizes_w opc, string OpcodeStr, + SDNode OpNode, AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo ShuffleMask> { + let Predicates = [HasBWI] in + defm NAME: avx512_perm_t, EVEX_V512; + let Predicates = [HasBWI, HasVLX] in { + defm NAME#128: avx512_perm_t, EVEX_V128; + defm NAME#256: avx512_perm_t, EVEX_V256; + } +} + +defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", X86VPermt2Int, + avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", X86VPermt2Int, + avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMT2W : avx512_perm_t_sizes_w<0x7D, "vpermt2w", X86VPermt2Int, + avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; +defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", X86VPermt2Fp, + avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", X86VPermt2Fp, + avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask Index: ../lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- ../lib/Target/X86/X86InstrFragmentsSIMD.td +++ ../lib/Target/X86/X86InstrFragmentsSIMD.td @@ -363,8 +363,17 @@ def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>; def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>; def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>; -def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>; -def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>; +def X86VPermt2Fp : SDNode<"X86ISD::VPERMV3", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>, + SDTCisSameAs<0,1>, SDTCisInt<2>, + SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>, + SDTCisSameAs<0,3>]>, []>; +def X86VPermt2Int : SDNode<"X86ISD::VPERMV3", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>, + SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, + SDTCisSameAs<0,3>]>, []>; + +def X86VPermi2X : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>; def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;