Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -1931,45 +1931,47 @@ } } -multiclass blendmask_dq opc, string OpcodeStr, - X86FoldableSchedWrite sched128, - X86FoldableSchedWrite sched256, +multiclass blendmask_dq opc, string OpcodeStr, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { - defm Z : WriteFVarBlendask , - WriteFVarBlendask_rmb , EVEX_V512; + defm Z : WriteFVarBlendask, + WriteFVarBlendask_rmb, + EVEX_V512; let Predicates = [HasVLX] in { - defm Z256 : WriteFVarBlendask, - WriteFVarBlendask_rmb, EVEX_V256; - defm Z128 : WriteFVarBlendask, - WriteFVarBlendask_rmb, EVEX_V128; + defm Z256 : WriteFVarBlendask, + WriteFVarBlendask_rmb, + EVEX_V256; + defm Z128 : WriteFVarBlendask, + WriteFVarBlendask_rmb, + EVEX_V128; } } -multiclass blendmask_bw opc, string OpcodeStr, - X86FoldableSchedWrite sched128, - X86FoldableSchedWrite sched256, +multiclass blendmask_bw opc, string OpcodeStr, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasBWI] in - defm Z : WriteFVarBlendask, EVEX_V512; + defm Z : WriteFVarBlendask, + EVEX_V512; let Predicates = [HasBWI, HasVLX] in { - defm Z256 : WriteFVarBlendask, EVEX_V256; - defm Z128 : WriteFVarBlendask, EVEX_V128; + defm Z256 : WriteFVarBlendask, + EVEX_V256; + defm Z128 : WriteFVarBlendask, + EVEX_V128; } } -defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", WriteFVarBlend, WriteFVarBlendY, +defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, avx512vl_f32_info>; -defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", WriteFVarBlend, WriteFVarBlendY, +defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, avx512vl_f64_info>, VEX_W; -defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", WriteVarBlend, WriteVarBlend, +defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, avx512vl_i32_info>; -defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", WriteVarBlend, WriteVarBlend, +defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, avx512vl_i64_info>, VEX_W; -defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", WriteVarBlend, WriteVarBlend, +defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, avx512vl_i8_info>; -defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", WriteVarBlend, WriteVarBlend, +defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, avx512vl_i16_info>, VEX_W; //===----------------------------------------------------------------------===// @@ -5508,43 +5510,43 @@ multiclass avx512_shift_rmi_sizes opc, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched, + X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasAVX512] in - defm Z: avx512_shift_rmi, - avx512_shift_rmbi, + avx512_shift_rmbi, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256: avx512_shift_rmi, - avx512_shift_rmbi, + avx512_shift_rmbi, EVEX_V256; - defm Z128: avx512_shift_rmi, - avx512_shift_rmbi, + avx512_shift_rmbi, EVEX_V128; } } multiclass avx512_shift_rmi_w opcw, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched> { + X86SchedWriteWidths sched> { let Predicates = [HasBWI] in defm WZ: avx512_shift_rmi, EVEX_V512, VEX_WIG; + sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG; let Predicates = [HasVLX, HasBWI] in { defm WZ256: avx512_shift_rmi, EVEX_V256, VEX_WIG; + sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG; defm WZ128: avx512_shift_rmi, EVEX_V128, VEX_WIG; + sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG; } } multiclass avx512_shift_rmi_dq opcd, bits<8> opcq, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched> { + X86SchedWriteWidths sched> { defm D: avx512_shift_rmi_sizes, EVEX_CD8<32, CD8VF>; defm Q: avx512_shift_rmi_sizes, + SchedWriteVecShift>, avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, - WriteVecShift>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, - WriteVecShift>, + SchedWriteVecShift>, avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, - WriteVecShift>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V; defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, - WriteVecShift>, + SchedWriteVecShift>, avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, - WriteVecShift>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V; defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, - WriteVecShift>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V; defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, - WriteVecShift>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, WriteVecShift>; defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, WriteVecShift>; @@ -5975,28 +5977,27 @@ } multiclass avx512_permil_vec_common OpcVar, - X86FoldableSchedWrite sched128, - X86FoldableSchedWrite sched256, + X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl> { let Predicates = [HasAVX512] in { - defm Z : avx512_permil_vec, EVEX_V512; } let Predicates = [HasAVX512, HasVLX] in { - defm Z128 : avx512_permil_vec, EVEX_V128; - defm Z256 : avx512_permil_vec, EVEX_V256; } } multiclass avx512_permil OpcImm, bits<8> OpcVar, AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ - defm NAME: avx512_permil_vec_common; + defm NAME: avx512_permil_vec_common; defm NAME: avx512_shift_rmi_sizes, + X86VPermilpi, SchedWriteFShuffle, _>, EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; } @@ -6012,12 +6013,14 @@ //===----------------------------------------------------------------------===// defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", - X86PShufd, WriteShuffle, avx512vl_i32_info>, + X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", - X86PShufhw, WriteShuffle>, EVEX, AVX512XSIi8Base; + X86PShufhw, SchedWriteShuffle>, + EVEX, AVX512XSIi8Base; defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", - X86PShuflw, WriteShuffle>, EVEX, AVX512XDIi8Base; + X86PShuflw, SchedWriteShuffle>, + EVEX, AVX512XDIi8Base; multiclass avx512_pshufb_sizes opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched> { Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -2332,41 +2332,41 @@ /// There are no patterns here because isel prefers integer versions for SSE2 /// and later. There are SSE1 v4f32 patterns later. multiclass sse12_fp_packed_logical opc, string OpcodeStr, - SDNode OpNode> { + SDNode OpNode, X86SchedWriteWidths sched> { let Predicates = [HasAVX, NoVLX] in { defm V#NAME#PSY : sse12_fp_packed_logical_rm, PS, VEX_4V, VEX_L, VEX_WIG; defm V#NAME#PDY : sse12_fp_packed_logical_rm, PD, VEX_4V, VEX_L, VEX_WIG; defm V#NAME#PS : sse12_fp_packed_logical_rm, PS, VEX_4V, VEX_WIG; defm V#NAME#PD : sse12_fp_packed_logical_rm, PD, VEX_4V, VEX_WIG; } let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm, PS; defm PD : sse12_fp_packed_logical_rm, PD; } } -defm AND : sse12_fp_packed_logical<0x54, "and", and>; -defm OR : sse12_fp_packed_logical<0x56, "or", or>; -defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; +defm AND : sse12_fp_packed_logical<0x54, "and", and, SchedWriteFLogic>; +defm OR : sse12_fp_packed_logical<0x56, "or", or, SchedWriteFLogic>; +defm XOR : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>; let isCommutable = 0 in - defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; + defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>; // If only AVX1 is supported, we need to handle integer operations with // floating point instructions since the integer versions aren't available. @@ -6053,42 +6053,42 @@ let Predicates = [HasAVX] in { defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32, VR128, loadv4f32, f128mem, 0, SSEPackedSingle, - WriteFBlend, BlendCommuteImm4>, + SchedWriteFBlend.XMM, BlendCommuteImm4>, VEX_4V, VEX_WIG; defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32, VR256, loadv8f32, f256mem, 0, SSEPackedSingle, - WriteFBlendY, BlendCommuteImm8>, + SchedWriteFBlend.YMM, BlendCommuteImm8>, VEX_4V, VEX_L, VEX_WIG; defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64, VR128, loadv2f64, f128mem, 0, SSEPackedDouble, - WriteFBlend, BlendCommuteImm2>, + SchedWriteFBlend.XMM, BlendCommuteImm2>, VEX_4V, VEX_WIG; defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64, VR256, loadv4f64, f256mem, 0, SSEPackedDouble, - WriteFBlendY, BlendCommuteImm4>, + SchedWriteFBlend.YMM, BlendCommuteImm4>, VEX_4V, VEX_L, VEX_WIG; defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16, VR128, loadv2i64, i128mem, 0, SSEPackedInt, - WriteBlend, BlendCommuteImm8>, + SchedWriteBlend.XMM, BlendCommuteImm8>, VEX_4V, VEX_WIG; } let Predicates = [HasAVX2] in { defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16, VR256, loadv4i64, i256mem, 0, SSEPackedInt, - WriteBlend, BlendCommuteImm8>, + SchedWriteBlend.YMM, BlendCommuteImm8>, VEX_4V, VEX_L, VEX_WIG; } defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32, VR128, memopv4f32, f128mem, 1, SSEPackedSingle, - WriteFBlend, BlendCommuteImm4>; + SchedWriteFBlend.XMM, BlendCommuteImm4>; defm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64, VR128, memopv2f64, f128mem, 1, SSEPackedDouble, - WriteFBlend, BlendCommuteImm2>; + SchedWriteFBlend.XMM, BlendCommuteImm2>; defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16, VR128, memopv2i64, i128mem, 1, SSEPackedInt, - WriteBlend, BlendCommuteImm8>; + SchedWriteBlend.XMM, BlendCommuteImm8>; // For insertion into the zero index (low half) of a 256-bit vector, it is // more efficient to generate a blend with immediate instead of an insert*128. @@ -6135,28 +6135,28 @@ let ExeDomain = SSEPackedDouble in { defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem, loadv2f64, int_x86_sse41_blendvpd, - WriteFVarBlend>; + SchedWriteFVarBlend.XMM>; defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem, loadv4f64, int_x86_avx_blendv_pd_256, - WriteFVarBlendY>, VEX_L; + SchedWriteFVarBlend.YMM>, VEX_L; } // ExeDomain = SSEPackedDouble let ExeDomain = SSEPackedSingle in { defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem, loadv4f32, int_x86_sse41_blendvps, - WriteFVarBlend>; + SchedWriteFVarBlend.XMM>; defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem, loadv8f32, int_x86_avx_blendv_ps_256, - WriteFVarBlendY>, VEX_L; + SchedWriteFVarBlend.YMM>, VEX_L; } // ExeDomain = SSEPackedSingle defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, loadv2i64, int_x86_sse41_pblendvb, - WriteVarBlend>; + SchedWriteVarBlend.XMM>; } let Predicates = [HasAVX2] in { defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem, loadv4i64, int_x86_avx2_pblendvb, - WriteVarBlend>, VEX_L; + SchedWriteVarBlend.YMM>, VEX_L; } let Predicates = [HasAVX] in { @@ -6265,12 +6265,12 @@ let ExeDomain = SSEPackedDouble in defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem, - int_x86_sse41_blendvpd, WriteFVarBlend>; + int_x86_sse41_blendvpd, SchedWriteFVarBlend.XMM>; let ExeDomain = SSEPackedSingle in defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem, - int_x86_sse41_blendvps, WriteFVarBlend>; + int_x86_sse41_blendvps, SchedWriteFVarBlend.XMM>; defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem, - int_x86_sse41_pblendvb, WriteVarBlend>; + int_x86_sse41_pblendvb, SchedWriteVarBlend.XMM>; // Aliases with the implicit xmm0 argument def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}", @@ -7120,18 +7120,18 @@ let ExeDomain = SSEPackedSingle in { defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, loadv2i64, v4f32, v4i32, WriteFShuffle, - WriteFVarShuffle>; + SchedWriteFVarShuffle.XMM>; defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, loadv4i64, v8f32, v8i32, WriteFShuffle, - WriteFVarShuffleY>, VEX_L; + SchedWriteFVarShuffle.YMM>, VEX_L; } let ExeDomain = SSEPackedDouble in { defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, loadv2i64, v2f64, v2i64, WriteFShuffle, - WriteFVarShuffle>; + SchedWriteFVarShuffle.XMM>; defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, loadv4i64, v4f64, v4i64, WriteFShuffle, - WriteFVarShuffleY>, VEX_L; + SchedWriteFVarShuffle.YMM>, VEX_L; } //===----------------------------------------------------------------------===// @@ -7307,11 +7307,12 @@ (commuteXForm imm:$src3))>; } -defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32, WriteBlend, - VR128, loadv2i64, i128mem, BlendCommuteImm4>; -defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32, WriteBlend, - VR256, loadv4i64, i256mem, BlendCommuteImm8>, - VEX_L; +defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32, + SchedWriteBlend.XMM, VR128, loadv2i64, i128mem, + BlendCommuteImm4>; +defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32, + SchedWriteBlend.YMM, VR256, loadv4i64, i256mem, + BlendCommuteImm8>, VEX_L; // For insertion into the zero index (low half) of a 256-bit vector, it is // more efficient to generate a blend with immediate instead of an insert*128. Index: llvm/trunk/lib/Target/X86/X86Schedule.td =================================================================== --- llvm/trunk/lib/Target/X86/X86Schedule.td +++ llvm/trunk/lib/Target/X86/X86Schedule.td @@ -38,6 +38,18 @@ } } +// Multiclass that wraps X86FoldableSchedWrite for each vector width. +class X86SchedWriteWidths { + X86FoldableSchedWrite Scl = sScl; // Scalar float/double operations. + X86FoldableSchedWrite MMX = sScl; // MMX operations. + X86FoldableSchedWrite XMM = s128; // XMM operations. + X86FoldableSchedWrite YMM = s256; // YMM operations. + X86FoldableSchedWrite ZMM = s512; // ZMM operations. +} + // Loads, stores, and moves, not folded with other operations. def WriteLoad : SchedWrite; def WriteStore : SchedWrite; @@ -185,6 +197,57 @@ // Nop, not very useful expect it provides a model for nops! def WriteNop : SchedWrite; +// Vector width wrappers. +def SchedWriteFAdd + : X86SchedWriteWidths; +def SchedWriteFCmp + : X86SchedWriteWidths; +def SchedWriteFMul + : X86SchedWriteWidths; +def SchedWriteFDiv + : X86SchedWriteWidths; +def SchedWriteFLogic + : X86SchedWriteWidths; + +def SchedWriteFShuffle + : X86SchedWriteWidths; +def SchedWriteFVarShuffle + : X86SchedWriteWidths; +def SchedWriteFBlend + : X86SchedWriteWidths; +def SchedWriteFVarBlend + : X86SchedWriteWidths; + +def SchedWriteVecALU + : X86SchedWriteWidths; +def SchedWriteVecLogic + : X86SchedWriteWidths; +def SchedWriteVecShift + : X86SchedWriteWidths; +def SchedWriteVecIMul + : X86SchedWriteWidths; +def SchedWritePMULLD + : X86SchedWriteWidths; + +def SchedWriteShuffle + : X86SchedWriteWidths; +def SchedWriteVarShuffle + : X86SchedWriteWidths; +def SchedWriteBlend + : X86SchedWriteWidths; +def SchedWriteVarBlend + : X86SchedWriteWidths; + //===----------------------------------------------------------------------===// // Generic Processor Scheduler Models.