Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -17637,6 +17637,21 @@ subVec, subVec, immVal), Mask, Passthru, Subtarget, DAG); } + case BRCST32x2_TO_VEC: { + SDValue Src = Op.getOperand(1); + SDValue PassThru = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + + assert((VT.getScalarType() == MVT::i32 || + VT.getScalarType() == MVT::f32) && "Unexpected type!"); + //bitcast Src to packed 64 + MVT ScalarVT = VT.getScalarType() == MVT::i32 ? MVT::i64 : MVT::f64; + MVT BitcastVT = MVT::getVectorVT(ScalarVT, Src.getValueSizeInBits()/64); + Src = DAG.getBitcast(BitcastVT, Src); + + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src), + Mask, PassThru, Subtarget, DAG); + } default: break; } Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -1070,45 +1070,29 @@ EVEX_V512, EVEX_CD8<32, CD8VT8>; } -multiclass avx512_broadcast_32x2 opc, string OpcodeStr, - X86VectorVTInfo _Dst, X86VectorVTInfo _Src, - SDNode OpNode = X86SubVBroadcast> { - - defm r : AVX512_maskable, - T8PD, EVEX; - let mayLoad = 1 in - defm m : AVX512_maskable, - T8PD, EVEX, EVEX_CD8<_Src.EltSize, CD8VT2>; -} - multiclass avx512_common_broadcast_32x2 opc, string OpcodeStr, - AVX512VLVectorVTInfo _> { + AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> { let Predicates = [HasDQI] in - defm Z : avx512_broadcast_32x2, + defm Z : avx512_broadcast_rm, EVEX_V512; let Predicates = [HasDQI, HasVLX] in - defm Z256 : avx512_broadcast_32x2, + defm Z256 : avx512_broadcast_rm, EVEX_V256; } multiclass avx512_common_broadcast_i32x2 opc, string OpcodeStr, - AVX512VLVectorVTInfo _> : - avx512_common_broadcast_32x2 { + AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> : + avx512_common_broadcast_32x2 { let Predicates = [HasDQI, HasVLX] in - defm Z128 : avx512_broadcast_32x2, EVEX_V128; + defm Z128 : avx512_broadcast_rm, + EVEX_V128; } defm VPBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", - avx512vl_i32_info>; + avx512vl_i32_info, avx512vl_i64_info>; defm VPBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", - avx512vl_f32_info>; + avx512vl_f32_info, avx512vl_f64_info>; def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -31,7 +31,7 @@ FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3, VPERM_2OP_MASK, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM, - COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, + COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, LOADA, LOADU, STOREA, STOREU, STOREANT, BLEND, INSERT_SUBVEC, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, @@ -513,10 +513,10 @@ X86ISD::VBROADCAST, 0), X86_INTRINSIC_DATA(avx512_mask_broadcast_ss_ps_512, INTR_TYPE_1OP_MASK, X86ISD::VBROADCAST, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, INTR_TYPE_1OP_MASK, - X86ISD::SUBV_BROADCAST, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, INTR_TYPE_1OP_MASK, - X86ISD::SUBV_BROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, BRCST32x2_TO_VEC, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, BRCST32x2_TO_VEC, + X86ISD::VBROADCAST, 0), X86_INTRINSIC_DATA(avx512_mask_broadcastf32x4_256, BRCST_SUBVEC_TO_VEC, X86ISD::SHUF128, 0), X86_INTRINSIC_DATA(avx512_mask_broadcastf32x4_512, BRCST_SUBVEC_TO_VEC, @@ -529,12 +529,12 @@ X86ISD::SHUF128, 0), X86_INTRINSIC_DATA(avx512_mask_broadcastf64x4_512, BRCST_SUBVEC_TO_VEC, X86ISD::SHUF128, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_128, INTR_TYPE_1OP_MASK, - X86ISD::SUBV_BROADCAST, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_256, INTR_TYPE_1OP_MASK, - X86ISD::SUBV_BROADCAST, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_512, INTR_TYPE_1OP_MASK, - X86ISD::SUBV_BROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_128, BRCST32x2_TO_VEC, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_256, BRCST32x2_TO_VEC, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_512, BRCST32x2_TO_VEC, + X86ISD::VBROADCAST, 0), X86_INTRINSIC_DATA(avx512_mask_broadcasti32x4_256, BRCST_SUBVEC_TO_VEC, X86ISD::SHUF128, 0), X86_INTRINSIC_DATA(avx512_mask_broadcasti32x4_512, BRCST_SUBVEC_TO_VEC,