Index: lib/Target/X86/X86InstrFormats.td =================================================================== --- lib/Target/X86/X86InstrFormats.td +++ lib/Target/X86/X86InstrFormats.td @@ -199,7 +199,8 @@ class TAPD : TA { Prefix OpPrefix = PD; } class TAXD : TA { Prefix OpPrefix = XD; } class VEX { Encoding OpEnc = EncVEX; } -class VEX_W { bit hasVEX_WPrefix = 1; } +class VEX_W { bits<2> VEX_WPrefix = 1; } +class VEX_WIG { bits<2> VEX_WPrefix = 2; } class VEX_4V : VEX { bit hasVEX_4V = 1; } class VEX_L { bit hasVEX_L = 1; } class VEX_LIG { bit ignoresVEX_L = 1; } @@ -270,7 +271,7 @@ bit hasREPPrefix = 0; // Does this inst have a REP prefix? Encoding OpEnc = EncNormal; // Encoding used by this instruction bits<2> OpEncBits = OpEnc.Value; - bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field? + bits<2> VEX_WPrefix = 0; // Does this inst set the VEX_W field? bit hasVEX_4V = 0; // Does this inst require the VEX.VVVV field? bit hasVEX_L = 0; // Does this inst use large (256-bit) registers? bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit @@ -317,7 +318,8 @@ let TSFlags{28-27} = ExeDomain.Value; let TSFlags{30-29} = OpEncBits; let TSFlags{38-31} = Opcode; - let TSFlags{39} = hasVEX_WPrefix; + // Currently no need for second bit in TSFlags - W Ignore is equivalent to 0. + let TSFlags{39} = VEX_WPrefix{0}; let TSFlags{40} = hasVEX_4V; let TSFlags{41} = hasVEX_L; let TSFlags{42} = hasEVEX_K; Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -526,12 +526,12 @@ // AVX defm V#NAME : sse12_move_rr, - VEX_4V, VEX_LIG; + VEX_4V, VEX_LIG, VEX_WIG; def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>, - VEX, VEX_LIG, Sched<[WriteStore]>; + VEX, VEX_LIG, Sched<[WriteStore]>, VEX_WIG; // SSE1 & 2 let Constraints = "$src1 = $dst" in { defm NAME : sse12_move_rr, VEX, VEX_LIG, Sched<[WriteLoad]>; + IIC_SSE_MOV_S_RM, d>, VEX, VEX_LIG, Sched<[WriteLoad]>, VEX_WIG; def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], @@ -785,29 +785,29 @@ let Predicates = [HasAVX, NoVLX] in { defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, - PS, VEX; + PS, VEX, VEX_WIG; defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, - PD, VEX; + PD, VEX, VEX_WIG; defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", SSEPackedSingle, SSE_MOVU_ITINS>, - PS, VEX; + PS, VEX, VEX_WIG; defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS>, - PD, VEX; + PD, VEX, VEX_WIG; defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, - PS, VEX, VEX_L; + PS, VEX, VEX_L, VEX_WIG; defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, - PD, VEX, VEX_L; + PD, VEX, VEX_L, VEX_WIG; defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", SSEPackedSingle, SSE_MOVU_ITINS>, - PS, VEX, VEX_L; + PS, VEX, VEX_L, VEX_WIG; defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS>, - PD, VEX, VEX_L; + PD, VEX, VEX_L, VEX_WIG; } let Predicates = [UseSSE1] in { @@ -831,35 +831,35 @@ def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], - IIC_SSE_MOVA_P_MR>, VEX; + IIC_SSE_MOVA_P_MR>, VEX, VEX_WIG; def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movapd\t{$src, $dst|$dst, $src}", [(alignedstore (v2f64 VR128:$src), addr:$dst)], - IIC_SSE_MOVA_P_MR>, VEX; + IIC_SSE_MOVA_P_MR>, VEX, VEX_WIG; def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movups\t{$src, $dst|$dst, $src}", [(store (v4f32 VR128:$src), addr:$dst)], - IIC_SSE_MOVU_P_MR>, VEX; + IIC_SSE_MOVU_P_MR>, VEX, VEX_WIG; def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v2f64 VR128:$src), addr:$dst)], - IIC_SSE_MOVU_P_MR>, VEX; + IIC_SSE_MOVU_P_MR>, VEX, VEX_WIG; def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore256 (v8f32 VR256:$src), addr:$dst)], - IIC_SSE_MOVA_P_MR>, VEX, VEX_L; + IIC_SSE_MOVA_P_MR>, VEX, VEX_L, VEX_WIG; def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movapd\t{$src, $dst|$dst, $src}", [(alignedstore256 (v4f64 VR256:$src), addr:$dst)], - IIC_SSE_MOVA_P_MR>, VEX, VEX_L; + IIC_SSE_MOVA_P_MR>, VEX, VEX_L, VEX_WIG; def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movups\t{$src, $dst|$dst, $src}", [(store (v8f32 VR256:$src), addr:$dst)], - IIC_SSE_MOVU_P_MR>, VEX, VEX_L; + IIC_SSE_MOVU_P_MR>, VEX, VEX_L, VEX_WIG; def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)], - IIC_SSE_MOVU_P_MR>, VEX, VEX_L; + IIC_SSE_MOVU_P_MR>, VEX, VEX_L, VEX_WIG; } // SchedRW // For disassembler @@ -868,35 +868,35 @@ def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX; + IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG; def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX; + IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG; def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX; + IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG; def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX; + IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG; def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movaps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; } // Aliases to help the assembler pick two byte VEX encodings by swapping the @@ -1106,7 +1106,7 @@ let Predicates = [UseAVX] in defm V#NAME : sse12_mov_hilo_packed_base, VEX_4V; + itin>, VEX_4V, VEX_WIG; let Constraints = "$src1 = $dst" in defm NAME : sse12_mov_hilo_packed_base, VEX; + IIC_SSE_MOV_LH>, VEX, VEX_WIG; def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlpd\t{$src, $dst|$dst, $src}", [(store (f64 (extractelt (v2f64 VR128:$src), (iPTR 0))), addr:$dst)], - IIC_SSE_MOV_LH>, VEX; + IIC_SSE_MOV_LH>, VEX, VEX_WIG; }// UseAVX def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", @@ -1237,12 +1237,12 @@ [(store (f64 (extractelt (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), (bc_v2f64 (v4f32 VR128:$src))), - (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX, VEX_WIG; def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhpd\t{$src, $dst|$dst, $src}", [(store (f64 (extractelt (v2f64 (X86Unpckh VR128:$src, VR128:$src)), - (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX, VEX_WIG; } // UseAVX def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", @@ -1342,14 +1342,14 @@ [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V, Sched<[WriteFShuffle]>; + VEX_4V, Sched<[WriteFShuffle]>, VEX_WIG; def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V, Sched<[WriteFShuffle]>; + VEX_4V, Sched<[WriteFShuffle]>, VEX_WIG; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), @@ -1724,11 +1724,11 @@ defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, loadv2i64, "vcvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, SSE_CVT_PS>, - PS, VEX, Requires<[HasAVX, NoVLX]>; + PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG; defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, loadv4i64, "vcvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, SSE_CVT_PS>, - PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>; + PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG; defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memopv2i64, "cvtdq2ps\t{$src, $dst|$dst, $src}", @@ -1779,14 +1779,14 @@ (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, - Sched<[WriteCvtF2F]>; + Sched<[WriteCvtF2F]>, VEX_WIG; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR64:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, - Sched<[WriteCvtF2FLd, ReadAfterLd]>; + Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG; } def : Pat<(f32 (fpround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, @@ -1845,14 +1845,14 @@ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, - Sched<[WriteCvtF2F]>; + Sched<[WriteCvtF2F]>, VEX_WIG; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR32:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, - Sched<[WriteCvtF2FLd, ReadAfterLd]>; + Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG; } def : Pat<(f64 (fpextend FR32:$src)), @@ -1998,22 +1998,22 @@ def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>, VEX_WIG; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (loadv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>, VEX_WIG; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 (loadv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>, VEX_WIG; def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], @@ -2034,7 +2034,7 @@ "vcvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, - VEX, Sched<[WriteCvtF2I]>; + VEX, Sched<[WriteCvtF2I]>, VEX_WIG; // XMM only def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", @@ -2043,7 +2043,7 @@ "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX, - Sched<[WriteCvtF2ILd]>; + Sched<[WriteCvtF2ILd]>, VEX_WIG; def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", (VCVTPD2DQrm VR128:$dst, f128mem:$src), 0>; @@ -2052,12 +2052,12 @@ "vcvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>, - VEX, VEX_L, Sched<[WriteCvtF2I]>; + VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG; def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>, - VEX, VEX_L, Sched<[WriteCvtF2ILd]>; + VEX, VEX_L, Sched<[WriteCvtF2ILd]>, VEX_WIG; def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}", (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>; def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}", @@ -2082,23 +2082,23 @@ "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (fp_to_sint (v4f32 VR128:$src))))], - IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>, VEX_WIG; def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (fp_to_sint (loadv4f32 addr:$src))))], - IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>, VEX_WIG; def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v8i32 (fp_to_sint (v8f32 VR256:$src))))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG; def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v8i32 (fp_to_sint (loadv8f32 addr:$src))))], IIC_SSE_CVT_PS_RM>, VEX, VEX_L, - Sched<[WriteCvtF2ILd]>; + Sched<[WriteCvtF2ILd]>, VEX_WIG; } def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -2117,7 +2117,7 @@ "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvttp2si (v2f64 VR128:$src))))], - IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>, VEX_WIG; // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. @@ -2131,7 +2131,7 @@ "cvttpd2dq{x}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvttp2si (loadv2f64 addr:$src))))], - IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>, VEX_WIG; def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", (VCVTTPD2DQrm VR128:$dst, f128mem:$src), 0>; @@ -2141,12 +2141,12 @@ "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (fp_to_sint (v4f64 VR256:$src))))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (fp_to_sint (loadv4f64 addr:$src))))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>, VEX_WIG; } def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}", (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>; @@ -2192,19 +2192,19 @@ def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))], - IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>; + IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>, VEX_WIG; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>; + IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>, VEX_WIG; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))], - IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>; + IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>, VEX_WIG; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; + IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>, VEX_WIG; } let Predicates = [UseSSE2] in { @@ -2225,22 +2225,22 @@ "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))]>, - VEX, Sched<[WriteCvtI2FLd]>; + VEX, Sched<[WriteCvtI2FLd]>, VEX_WIG; def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>, - VEX, Sched<[WriteCvtI2F]>; + VEX, Sched<[WriteCvtI2F]>, VEX_WIG; def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))))]>, - VEX, VEX_L, Sched<[WriteCvtI2FLd]>; + VEX, VEX_L, Sched<[WriteCvtI2FLd]>, VEX_WIG; def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v4f64 (sint_to_fp (v4i32 VR128:$src))))]>, - VEX, VEX_L, Sched<[WriteCvtI2F]>; + VEX, VEX_L, Sched<[WriteCvtI2F]>, VEX_WIG; } let hasSideEffects = 0, mayLoad = 1 in @@ -2275,7 +2275,7 @@ def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))], - IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>, VEX_WIG; // XMM only def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", @@ -2284,7 +2284,7 @@ def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps{x}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>, VEX_WIG; def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", (VCVTPD2PSrm VR128:$dst, f128mem:$src), 0>; @@ -2293,11 +2293,11 @@ def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (fpround VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>, VEX_WIG; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (fpround (loadv4f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>, VEX_WIG; } def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}", (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>; @@ -2370,12 +2370,12 @@ defm VCMPSS : sse12_cmp_scalar, XS, VEX_4V, VEX_LIG; + SSE_ALU_F32S, i8immZExt5>, XS, VEX_4V, VEX_LIG, VEX_WIG; defm VCMPSD : sse12_cmp_scalar, // same latency as 32 bit compare - XD, VEX_4V, VEX_LIG; + XD, VEX_4V, VEX_LIG, VEX_WIG; let Constraints = "$src1 = $dst" in { defm CMPSS : sse12_cmp_scalar, PS, VEX, VEX_LIG; + "ucomiss">, PS, VEX, VEX_LIG, VEX_WIG; defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd">, PD, VEX, VEX_LIG; + "ucomisd">, PD, VEX, VEX_LIG, VEX_WIG; let Pattern = [] in { defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, - "comiss">, PS, VEX, VEX_LIG; + "comiss">, PS, VEX, VEX_LIG, VEX_WIG; defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, - "comisd">, PD, VEX, VEX_LIG; + "comisd">, PD, VEX, VEX_LIG, VEX_WIG; } let isCodeGenOnly = 1 in { defm Int_VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss">, PS, VEX; + sse_load_f32, "ucomiss">, PS, VEX, VEX_WIG; defm Int_VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd">, PD, VEX; + sse_load_f64, "ucomisd">, PD, VEX, VEX_WIG; defm Int_VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss">, PS, VEX; + sse_load_f32, "comiss">, PS, VEX, VEX_WIG; defm Int_VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd">, PD, VEX; + sse_load_f64, "comisd">, PD, VEX, VEX_WIG; } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss">, PS; @@ -2542,11 +2542,11 @@ defm VCMPPS : sse12_cmp_packed, PS, VEX_4V; + SSEPackedSingle, i8immZExt5, loadv4f32>, PS, VEX_4V, VEX_WIG; defm VCMPPD : sse12_cmp_packed, PD, VEX_4V; + SSEPackedDouble, i8immZExt5, loadv2f64>, PD, VEX_4V, VEX_WIG; defm VCMPPSY : sse12_cmp_packed, PS, VEX_4V; + loadv4f32, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; defm VSHUFPSY : sse12_shuffle, PS, VEX_4V, VEX_L; + loadv8f32, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; defm VSHUFPD : sse12_shuffle, PD, VEX_4V; + loadv2f64, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; defm VSHUFPDY : sse12_shuffle, PD, VEX_4V, VEX_L; + loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { defm SHUFPS : sse12_shuffle, PS, VEX_4V; + SSEPackedSingle>, PS, VEX_4V, VEX_WIG; defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64, VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, VEX_4V; + SSEPackedDouble>, PD, VEX_4V, VEX_WIG; defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32, VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, VEX_4V; + SSEPackedSingle>, PS, VEX_4V, VEX_WIG; defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64, VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, VEX_4V; + SSEPackedDouble>, PD, VEX_4V, VEX_WIG; defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32, VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, VEX_4V, VEX_L; + SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64, VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, VEX_4V, VEX_L; + SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32, VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, VEX_4V, VEX_L; + SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64, VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, VEX_4V, VEX_L; + SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; }// Predicates = [HasAVX, NoVLX] let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, @@ -2788,13 +2788,13 @@ let Predicates = [HasAVX] in { defm VMOVMSKPS : sse12_extr_sign_mask, PS, VEX; + SSEPackedSingle>, PS, VEX, VEX_WIG; defm VMOVMSKPD : sse12_extr_sign_mask, PD, VEX; + SSEPackedDouble>, PD, VEX, VEX_WIG; defm VMOVMSKPSY : sse12_extr_sign_mask, PS, VEX, VEX_L; + SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG; defm VMOVMSKPDY : sse12_extr_sign_mask, PD, VEX, VEX_L; + SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG; } defm MOVMSKPS : sse12_extr_sign_mask { let Predicates = [HasAVX, prd] in defm V#NAME : PDI_binop_rm, VEX_4V; + VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V, VEX_WIG; let Constraints = "$src1 = $dst" in defm NAME : PDI_binop_rm, VEX_4V, VEX_L; + IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG; } // These are ordered here for pattern ordering requirements with the fp versions @@ -2875,7 +2875,7 @@ [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), (bc_v4i64 (v8f32 VR256:$src2))))], [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), - (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L; + (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L, VEX_WIG; defm V#NAME#PDY : sse12_fp_packed_logical_rm, - PD, VEX_4V, VEX_L; + PD, VEX_4V, VEX_L, VEX_WIG; defm V#NAME#PS : sse12_fp_packed_logical_rm, PS, VEX_4V; + (loadv2i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_WIG; defm V#NAME#PD : sse12_fp_packed_logical_rm, - PD, VEX_4V; + PD, VEX_4V, VEX_WIG; } let Constraints = "$src1 = $dst" in { @@ -3064,17 +3064,17 @@ let Predicates = [HasAVX, NoVLX] in { defm V#NAME#PS : sse12_fp_packed, PS, VEX_4V; + SSEPackedSingle, itins.s, 0>, PS, VEX_4V, VEX_WIG; defm V#NAME#PD : sse12_fp_packed, PD, VEX_4V; + SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_WIG; defm V#NAME#PSY : sse12_fp_packed, PS, VEX_4V, VEX_L; + SSEPackedSingle, itins.s, 0>, PS, VEX_4V, VEX_L, VEX_WIG; defm V#NAME#PDY : sse12_fp_packed, PD, VEX_4V, VEX_L; + SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { @@ -3091,10 +3091,10 @@ SizeItins itins> { defm V#NAME#SS : sse12_fp_scalar, - XS, VEX_4V, VEX_LIG; + XS, VEX_4V, VEX_LIG, VEX_WIG; defm V#NAME#SD : sse12_fp_scalar, - XD, VEX_4V, VEX_LIG; + XD, VEX_4V, VEX_LIG, VEX_WIG; let Constraints = "$src1 = $dst" in { defm SS : sse12_fp_scalar { defm V#NAME#SS : sse12_fp_scalar_int, XS, VEX_4V, VEX_LIG; + SSEPackedSingle, itins.s, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG; defm V#NAME#SD : sse12_fp_scalar_int, XD, VEX_4V, VEX_LIG; + SSEPackedDouble, itins.d, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG; let Constraints = "$src1 = $dst" in { defm SS : sse12_fp_scalar_int, VEX, Sched<[itins.Sched]>; + itins.rr>, VEX, Sched<[itins.Sched]>, VEX_WIG; def V#NAME#PSm : PSI, VEX, Sched<[itins.Sched.Folded]>; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>, VEX_WIG; def V#NAME#PSYr : PSI, VEX, VEX_L, Sched<[itins.Sched]>; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>, VEX_WIG; def V#NAME#PSYm : PSI, VEX, VEX_L, Sched<[itins.Sched.Folded]>; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>, VEX_WIG; } def PSr : PSI, VEX, Sched<[itins.Sched]>; + itins.rr>, VEX, Sched<[itins.Sched]>, VEX_WIG; def V#NAME#PDm : PDI, VEX, Sched<[itins.Sched.Folded]>; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>, VEX_WIG; def V#NAME#PDYr : PDI, VEX, VEX_L, Sched<[itins.Sched]>; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>, VEX_WIG; def V#NAME#PDYm : PDI, VEX, VEX_L, Sched<[itins.Sched.Folded]>; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>, VEX_WIG; } def PDr : PDI("int_x86_sse_"##OpcodeStr##_ss), OpNode, - SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG; + SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG; } multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, @@ -3578,7 +3578,7 @@ f64mem, !cast("int_x86_sse2_"##OpcodeStr##_sd), OpNode, SSEPackedDouble, itins, "SD">, - XD, VEX_4V, VEX_LIG; + XD, VEX_4V, VEX_LIG, VEX_WIG; } // Square root. @@ -3646,13 +3646,13 @@ "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], - IIC_SSE_MOVNT>, VEX; + IIC_SSE_MOVNT>, VEX, VEX_WIG; def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntpd\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)], - IIC_SSE_MOVNT>, VEX; + IIC_SSE_MOVNT>, VEX, VEX_WIG; let ExeDomain = SSEPackedInt in def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), @@ -3660,27 +3660,27 @@ "movntdq\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], - IIC_SSE_MOVNT>, VEX; + IIC_SSE_MOVNT>, VEX, VEX_WIG; def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v8f32 VR256:$src), addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; + IIC_SSE_MOVNT>, VEX, VEX_L, VEX_WIG; def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movntpd\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f64 VR256:$src), addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; + IIC_SSE_MOVNT>, VEX, VEX_L, VEX_WIG; let ExeDomain = SSEPackedInt in def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movntdq\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4i64 VR256:$src), addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; + IIC_SSE_MOVNT>, VEX, VEX_L, VEX_WIG; } def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), @@ -3797,10 +3797,10 @@ def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], - IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>; + IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>, VEX_WIG; def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], - IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>; + IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>, VEX_WIG; let Predicates = [UseSSE1] in { def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src), @@ -3820,16 +3820,16 @@ let hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, - VEX; + VEX, VEX_WIG; def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, - VEX, VEX_L; + VEX, VEX_L, VEX_WIG; def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, - VEX; + VEX, VEX_WIG; def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, - VEX, VEX_L; + VEX, VEX_L, VEX_WIG; } // For Disassembler @@ -3838,34 +3838,34 @@ def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, - VEX; + VEX, VEX_WIG; def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, - VEX; + VEX, VEX_WIG; def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movdqu\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, hasSideEffects = 0, SchedRW = [WriteLoad] in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, - VEX; + VEX, VEX_WIG; def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, - VEX, VEX_L; + VEX, VEX_L, VEX_WIG; let Predicates = [HasAVX] in { def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, - XS, VEX; + XS, VEX, VEX_WIG; def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, - XS, VEX, VEX_L; + XS, VEX, VEX_L, VEX_WIG; } } @@ -3873,18 +3873,18 @@ def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, - VEX; + VEX, VEX_WIG; def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, - VEX, VEX_L; + VEX, VEX_L, VEX_WIG; let Predicates = [HasAVX] in { def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, - XS, VEX; + XS, VEX, VEX_WIG; def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, - XS, VEX, VEX_L; + XS, VEX, VEX_L, VEX_WIG; } } @@ -4036,12 +4036,12 @@ let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, - loadv2i64, i128mem, SSE_PMADD, 0>, VEX_4V; + loadv2i64, i128mem, SSE_PMADD, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16, VR256, loadv4i64, i256mem, SSE_PMADD, - 0>, VEX_4V, VEX_L; + 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, memopv2i64, i128mem, SSE_PMADD>; @@ -4049,11 +4049,11 @@ let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128, loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 0>, - VEX_4V; + VEX_4V, VEX_WIG; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256, loadv4i64, i256mem, SSE_INTMUL_ITINS_P, 0>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P>; @@ -4061,11 +4061,11 @@ let Predicates = [HasAVX, NoVLX] in defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 0>, - VEX_4V; + VEX_4V, VEX_WIG; let Predicates = [HasAVX2, NoVLX] in defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, VR256, loadv4i64, i256mem, - SSE_INTMUL_ITINS_P, 0>, VEX_4V, VEX_L; + SSE_INTMUL_ITINS_P, 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, memopv2i64, i128mem, SSE_INTMUL_ITINS_P>; @@ -4112,11 +4112,11 @@ let Predicates = [HasAVX, prd] in defm V#NAME : PDI_binop_rmi, VEX_4V; + loadv2i64, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2, prd] in defm V#NAME#Y : PDI_binop_rmi, VEX_4V, VEX_L; + loadv2i64, 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in defm NAME : PDI_binop_rmi; @@ -4137,10 +4137,10 @@ SDNode OpNode> { let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm V#NAME : PDI_binop_ri, VEX_4V; + VR128, v16i8, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in defm V#NAME#Y : PDI_binop_ri, VEX_4V, VEX_L; + VR256, v32i8, 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in defm NAME : PDI_binop_ri; } @@ -4201,7 +4201,7 @@ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>; + IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>, VEX_WIG; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), !strconcat("v", OpcodeStr, @@ -4209,7 +4209,7 @@ [(set VR128:$dst, (vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)), (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, - Sched<[WriteShuffleLd]>; + Sched<[WriteShuffleLd]>, VEX_WIG; } let Predicates = [HasAVX2, prd] in { @@ -4219,7 +4219,7 @@ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>; + IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>, VEX_WIG; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, u8imm:$src2), !strconcat("v", OpcodeStr, @@ -4227,7 +4227,7 @@ [(set VR256:$dst, (vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)), (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L, - Sched<[WriteShuffleLd]>; + Sched<[WriteShuffleLd]>, VEX_WIG; } let Predicates = [UseSSE2] in { @@ -4363,24 +4363,24 @@ let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, - loadv2i64, 0>, VEX_4V; + loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, - loadv2i64, 0>, VEX_4V; + loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, - loadv2i64, 0>, VEX_4V; + loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, loadv2i64, 0>, VEX_4V; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus>, VEX_4V, VEX_L; } @@ -4442,44 +4442,44 @@ let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, - loadv2i64, 0>, VEX_4V; + loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, - loadv2i64, 0>, VEX_4V; + loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, - loadv2i64, 0>, VEX_4V; + loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, - loadv2i64, 0>, VEX_4V; + loadv2i64, 0>, VEX_4V, VEX_WIG; } let Predicates = [HasAVX, NoVLX] in { defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, - loadv2i64, 0>, VEX_4V; + loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, - loadv2i64, 0>, VEX_4V; + loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, - loadv2i64, 0>, VEX_4V; + loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, - loadv2i64, 0>, VEX_4V; + loadv2i64, 0>, VEX_4V, VEX_WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; } let Predicates = [HasAVX2, NoVLX] in { defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { @@ -4564,14 +4564,14 @@ (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))], - IIC_SSE_MOVMSK>, VEX; + IIC_SSE_MOVMSK>, VEX, VEX_WIG; let Predicates = [HasAVX2] in { def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR256:$src), "pmovmskb\t{$src, $dst|$dst, $src}", [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>, - VEX, VEX_L; + VEX, VEX_L, VEX_WIG; } def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), @@ -4592,13 +4592,13 @@ (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], - IIC_SSE_MASKMOV>, VEX; + IIC_SSE_MASKMOV>, VEX, VEX_WIG; let Uses = [RDI], Predicates = [HasAVX,In64BitMode] in def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], - IIC_SSE_MASKMOV>, VEX; + IIC_SSE_MASKMOV>, VEX, VEX_WIG; let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), @@ -4902,7 +4902,7 @@ "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, - VEX, Requires<[UseAVX]>; + VEX, Requires<[UseAVX]>, VEX_WIG; def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4919,7 +4919,7 @@ "movq\t{$src, $dst|$dst, $src}", [(store (i64 (extractelt (v2i64 VR128:$src), (iPTR 0))), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, VEX_WIG; def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (extractelt (v2i64 VR128:$src), @@ -4931,7 +4931,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, SchedRW = [WriteVecLogic] in { def VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, VEX; + "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, VEX, VEX_WIG; def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>; } @@ -4977,7 +4977,7 @@ "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], IIC_SSE_MOVQ_RR>, - XS, VEX, Requires<[UseAVX]>; + XS, VEX, Requires<[UseAVX]>, VEX_WIG; let AddedComplexity = 15 in def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", @@ -5015,13 +5015,13 @@ let Predicates = [HasAVX, NoVLX] in { defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", - v4f32, VR128, loadv4f32, f128mem>, VEX; + v4f32, VR128, loadv4f32, f128mem>, VEX, VEX_WIG; defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", - v4f32, VR128, loadv4f32, f128mem>, VEX; + v4f32, VR128, loadv4f32, f128mem>, VEX, VEX_WIG; defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", - v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L; + v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L, VEX_WIG; defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", - v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L; + v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L, VEX_WIG; } defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, memopv4f32, f128mem>; @@ -5089,8 +5089,8 @@ } let Predicates = [HasAVX, NoVLX] in { - defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; - defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L; + defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX, VEX_WIG; + defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L, VEX_WIG; } defm MOVDDUP : sse3_replicate_dfp<"movddup">; @@ -5127,11 +5127,11 @@ let Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX, VEX_WIG; def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, - VEX, VEX_L; + VEX, VEX_L, VEX_WIG; } def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", @@ -5165,15 +5165,15 @@ let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { defm VADDSUBPS : sse3_addsub, XD, VEX_4V; + f128mem, SSE_ALU_F32P, loadv4f32, 0>, XD, VEX_4V, VEX_WIG; defm VADDSUBPSY : sse3_addsub, XD, VEX_4V, VEX_L; + f256mem, SSE_ALU_F32P, loadv8f32, 0>, XD, VEX_4V, VEX_L, VEX_WIG; } let ExeDomain = SSEPackedDouble in { defm VADDSUBPD : sse3_addsub, PD, VEX_4V; + f128mem, SSE_ALU_F64P, loadv2f64, 0>, PD, VEX_4V, VEX_WIG; defm VADDSUBPDY : sse3_addsub, PD, VEX_4V, VEX_L; + f256mem, SSE_ALU_F64P, loadv4f64, 0>, PD, VEX_4V, VEX_L, VEX_WIG; } } let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { @@ -5260,23 +5260,23 @@ let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, - X86fhadd, loadv4f32, 0>, VEX_4V; + X86fhadd, loadv4f32, 0>, VEX_4V, VEX_WIG; defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, - X86fhsub, loadv4f32, 0>, VEX_4V; + X86fhsub, loadv4f32, 0>, VEX_4V, VEX_WIG; defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, - X86fhadd, loadv8f32, 0>, VEX_4V, VEX_L; + X86fhadd, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, - X86fhsub, loadv8f32, 0>, VEX_4V, VEX_L; + X86fhsub, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; } let ExeDomain = SSEPackedDouble in { defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, - X86fhadd, loadv2f64, 0>, VEX_4V; + X86fhadd, loadv2f64, 0>, VEX_4V, VEX_WIG; defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, - X86fhsub, loadv2f64, 0>, VEX_4V; + X86fhsub, loadv2f64, 0>, VEX_4V, VEX_WIG; defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, - X86fhadd, loadv4f64, 0>, VEX_4V, VEX_L; + X86fhadd, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, - X86fhsub, loadv4f64, 0>, VEX_4V, VEX_L; + X86fhsub, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; } } @@ -5345,11 +5345,11 @@ def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>; let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, X86Abs, loadv2i64>, VEX; - defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, X86Abs, loadv2i64>, VEX; + defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, X86Abs, loadv2i64>, VEX, VEX_WIG; + defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, X86Abs, loadv2i64>, VEX, VEX_WIG; } let Predicates = [HasAVX, NoVLX] in { - defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, X86Abs, loadv2i64>, VEX; + defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, X86Abs, loadv2i64>, VEX, VEX_WIG; } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { @@ -5370,11 +5370,11 @@ } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, X86Abs>, VEX, VEX_L; - defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, X86Abs>, VEX, VEX_L; + defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, X86Abs>, VEX, VEX_L, VEX_WIG; + defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, X86Abs>, VEX, VEX_L, VEX_WIG; } let Predicates = [HasAVX2, NoVLX] in { - defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, X86Abs>, VEX, VEX_L; + defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, X86Abs>, VEX, VEX_L, VEX_WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { @@ -5509,45 +5509,45 @@ let isCommutable = 0 in { defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8, VR128, loadv2i64, i128mem, - SSE_PSHUFB, 0>, VEX_4V; + SSE_PSHUFB, 0>, VEX_4V, VEX_WIG; defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16, v16i8, VR128, loadv2i64, i128mem, - SSE_PMADD, 0>, VEX_4V; + SSE_PMADD, 0>, VEX_4V, VEX_WIG; } defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16, VR128, loadv2i64, i128mem, - SSE_PMULHRSW, 0>, VEX_4V; + SSE_PMULHRSW, 0>, VEX_4V, VEX_WIG; } let ImmT = NoImm, Predicates = [HasAVX] in { let isCommutable = 0 in { defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128, loadv2i64, i128mem, - SSE_PHADDSUBW, 0>, VEX_4V; + SSE_PHADDSUBW, 0>, VEX_4V, VEX_WIG; defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128, loadv2i64, i128mem, - SSE_PHADDSUBD, 0>, VEX_4V; + SSE_PHADDSUBD, 0>, VEX_4V, VEX_WIG; defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128, loadv2i64, i128mem, - SSE_PHADDSUBW, 0>, VEX_4V; + SSE_PHADDSUBW, 0>, VEX_4V, VEX_WIG; defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128, loadv2i64, i128mem, SSE_PHADDSUBD, 0>, VEX_4V; defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", int_x86_ssse3_psign_b_128, - SSE_PSIGN, loadv2i64, 0>, VEX_4V; + SSE_PSIGN, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", int_x86_ssse3_psign_w_128, - SSE_PSIGN, loadv2i64, 0>, VEX_4V; + SSE_PSIGN, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", int_x86_ssse3_psign_d_128, - SSE_PSIGN, loadv2i64, 0>, VEX_4V; + SSE_PSIGN, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", int_x86_ssse3_phadd_sw_128, - SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V; + SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", int_x86_ssse3_phsub_sw_128, - SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V; + SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V, VEX_WIG; } } @@ -5555,42 +5555,42 @@ let isCommutable = 0 in { defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8, VR256, loadv4i64, i256mem, - SSE_PSHUFB, 0>, VEX_4V, VEX_L; + SSE_PSHUFB, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16, v32i8, VR256, loadv4i64, i256mem, - SSE_PMADD, 0>, VEX_4V, VEX_L; + SSE_PMADD, 0>, VEX_4V, VEX_L, VEX_WIG; } defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16, VR256, loadv4i64, i256mem, - SSE_PMULHRSW, 0>, VEX_4V, VEX_L; + SSE_PMULHRSW, 0>, VEX_4V, VEX_L, VEX_WIG; } let ImmT = NoImm, Predicates = [HasAVX2] in { let isCommutable = 0 in { defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16, VR256, loadv4i64, i256mem, - SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; + SSE_PHADDSUBW, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256, loadv4i64, i256mem, - SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; + SSE_PHADDSUBW, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16, VR256, loadv4i64, i256mem, - SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; + SSE_PHADDSUBW, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256, loadv4i64, i256mem, SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; defm VPSIGNBY : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b, - WriteVecALU>, VEX_4V, VEX_L; + WriteVecALU>, VEX_4V, VEX_L, VEX_WIG; defm VPSIGNWY : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w, - WriteVecALU>, VEX_4V, VEX_L; + WriteVecALU>, VEX_4V, VEX_L, VEX_WIG; defm VPSIGNDY : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d, - WriteVecALU>, VEX_4V, VEX_L; + WriteVecALU>, VEX_4V, VEX_L, VEX_WIG; defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", int_x86_avx2_phadd_sw, - WriteVecALU>, VEX_4V, VEX_L; + WriteVecALU>, VEX_4V, VEX_L, VEX_WIG; defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", int_x86_avx2_phsub_sw, - WriteVecALU>, VEX_4V, VEX_L; + WriteVecALU>, VEX_4V, VEX_L, VEX_WIG; } } @@ -5668,9 +5668,9 @@ } let Predicates = [HasAVX] in - defm VPALIGNR : ssse3_palignr<"vpalignr", 0>, VEX_4V; + defm VPALIGNR : ssse3_palignr<"vpalignr", 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2] in - defm VPALIGNR : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L; + defm VPALIGNR : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in defm PALIGNR : ssse3_palignr<"palignr">; @@ -5761,10 +5761,10 @@ defm NAME : SS41I_pmovx_rrrm; let Predicates = [HasAVX, prd] in defm V#NAME : SS41I_pmovx_rrrm, VEX; + VR128, VR128, AVXItins>, VEX, VEX_WIG; let Predicates = [HasAVX2, prd] in defm V#NAME#Y : SS41I_pmovx_rrrm, VEX, VEX_L; + VR256, VR128, AVX2Itins>, VEX, VEX_L, VEX_WIG; } multiclass SS41I_pmovx_rm opc, string OpcodeStr, X86MemOperand MemOp, @@ -6122,7 +6122,7 @@ let ExeDomain = SSEPackedSingle in { let Predicates = [UseAVX] in - defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; + defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, VEX_WIG; defm EXTRACTPS : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>; } @@ -6250,7 +6250,7 @@ let ExeDomain = SSEPackedSingle in { let Predicates = [UseAVX] in - defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; + defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V, VEX_WIG; let Constraints = "$src1 = $dst" in defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>; } @@ -6443,14 +6443,14 @@ defm VROUND : sse41_fp_unop_p<0x08, 0x09, "vround", f128mem, VR128, loadv4f32, loadv2f64, int_x86_sse41_round_ps, - int_x86_sse41_round_pd>, VEX; + int_x86_sse41_round_pd>, VEX, VEX_WIG; defm VROUNDY : sse41_fp_unop_p<0x08, 0x09, "vround", f256mem, VR256, loadv8f32, loadv4f64, int_x86_avx_round_ps_256, - int_x86_avx_round_pd_256>, VEX, VEX_L; + int_x86_avx_round_pd_256>, VEX, VEX_L, VEX_WIG; defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", int_x86_sse41_round_ss, - int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; + int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG, VEX_WIG; defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG; } @@ -6588,20 +6588,20 @@ def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, - Sched<[WriteVecLogic]>, VEX; + Sched<[WriteVecLogic]>, VEX, VEX_WIG; def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, - Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX; + Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_WIG; def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, - Sched<[WriteVecLogic]>, VEX, VEX_L; + Sched<[WriteVecLogic]>, VEX, VEX_L, VEX_WIG; def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, - Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_L; + Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_L, VEX_WIG; } let Defs = [EFLAGS] in { @@ -6704,7 +6704,7 @@ let Predicates = [HasAVX] in defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", int_x86_sse41_phminposuw, loadv2i64, - WriteVecIMul>, VEX; + WriteVecIMul>, VEX, VEX_WIG; defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", int_x86_sse41_phminposuw, memopv2i64, WriteVecIMul>; @@ -6760,65 +6760,65 @@ let Predicates = [HasAVX, NoVLX] in { defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V; + VEX_4V, VEX_WIG; defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V; + VEX_4V, VEX_WIG; defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V; + VEX_4V, VEX_WIG; defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V; + VEX_4V, VEX_WIG; defm VPMULDQ : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64, v4i32, VR128, loadv2i64, i128mem, - SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_WIG; } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V; + VEX_4V, VEX_WIG; defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V; + VEX_4V, VEX_WIG; defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V; + VEX_4V, VEX_WIG; defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V; + VEX_4V, VEX_WIG; } let Predicates = [HasAVX2, NoVLX] in { defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64, v8i32, VR256, loadv4i64, i256mem, - SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L, VEX_WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { @@ -6846,18 +6846,18 @@ let Predicates = [HasAVX, NoVLX] in { defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, loadv2i64, i128mem, 0, SSE_PMULLD_ITINS>, - VEX_4V; + VEX_4V, VEX_WIG; defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V; + VEX_4V, VEX_WIG; } let Predicates = [HasAVX2] in { defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, loadv4i64, i256mem, 0, SSE_PMULLD_ITINS>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { @@ -6927,52 +6927,52 @@ let isCommutable = 0 in { defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, VR128, loadv2i64, i128mem, 0, - DEFAULT_ITINS_MPSADSCHED>, VEX_4V; + DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_WIG; } let ExeDomain = SSEPackedSingle in { defm VBLENDPS : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v4f32, VR128, loadv4f32, f128mem, 0, - DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; + DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_WIG; defm VBLENDPSY : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v8f32, VR256, loadv8f32, f256mem, 0, - DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L; + DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L, VEX_WIG; } let ExeDomain = SSEPackedDouble in { defm VBLENDPD : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v2f64, VR128, loadv2f64, f128mem, 0, - DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; + DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_WIG; defm VBLENDPDY : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v4f64, VR256, loadv4f64, f256mem, 0, - DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L; + DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L, VEX_WIG; } defm VPBLENDW : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v8i16, VR128, loadv2i64, i128mem, 0, - DEFAULT_ITINS_BLENDSCHED>, VEX_4V; + DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_WIG; let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, VR128, loadv4f32, f128mem, 0, - SSE_DPPS_ITINS>, VEX_4V; + SSE_DPPS_ITINS>, VEX_4V, VEX_WIG; let ExeDomain = SSEPackedDouble in defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, VR128, loadv2f64, f128mem, 0, - SSE_DPPS_ITINS>, VEX_4V; + SSE_DPPS_ITINS>, VEX_4V, VEX_WIG; let ExeDomain = SSEPackedSingle in defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, VR256, loadv8f32, i256mem, 0, - SSE_DPPS_ITINS>, VEX_4V, VEX_L; + SSE_DPPS_ITINS>, VEX_4V, VEX_L, VEX_WIG; } let Predicates = [HasAVX2] in { let isCommutable = 0 in { defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, VR256, loadv4i64, i256mem, 0, - DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L; + DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L, VEX_WIG; } defm VPBLENDWY : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v16i16, VR256, loadv4i64, i256mem, 0, - DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L; + DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { @@ -7212,12 +7212,12 @@ def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, - VEX; + VEX, VEX_WIG; let Predicates = [HasAVX2, NoVLX] in def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>, - VEX, VEX_L; + VEX, VEX_L, VEX_WIG; def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; @@ -7277,11 +7277,11 @@ let Predicates = [HasAVX] in defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, - loadv2i64, i128mem, 0>, VEX_4V; + loadv2i64, i128mem, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2] in defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, - loadv4i64, i256mem, 0>, VEX_4V, VEX_L; + loadv4i64, i256mem, 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, @@ -7305,7 +7305,7 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in { defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128", loadv2i64>, - Requires<[HasAVX]>; + Requires<[HasAVX]>, VEX_WIG; defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128", memopv2i64>, Requires<[UseSSE42]>; } @@ -7379,7 +7379,7 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in { defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI", loadv2i64>, - Requires<[HasAVX]>; + Requires<[HasAVX]>, VEX_WIG; defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI", memopv2i64>, Requires<[UseSSE42]>; } @@ -7570,13 +7570,13 @@ // Perform One Round of an AES Encryption/Decryption Flow let Predicates = [HasAVX, HasAES] in { defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", - int_x86_aesni_aesenc, loadv2i64, 0>, VEX_4V; + int_x86_aesni_aesenc, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", - int_x86_aesni_aesenclast, loadv2i64, 0>, VEX_4V; + int_x86_aesni_aesenclast, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", - int_x86_aesni_aesdec, loadv2i64, 0>, VEX_4V; + int_x86_aesni_aesdec, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", - int_x86_aesni_aesdeclast, loadv2i64, 0>, VEX_4V; + int_x86_aesni_aesdeclast, loadv2i64, 0>, VEX_4V, VEX_WIG; } let Constraints = "$src1 = $dst" in { @@ -7597,12 +7597,12 @@ "vaesimc\t{$src1, $dst|$dst, $src1}", [(set VR128:$dst, (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>, - VEX; + VEX, VEX_WIG; def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1), "vaesimc\t{$src1, $dst|$dst, $src1}", [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>, - Sched<[WriteAESIMCLd]>, VEX; + Sched<[WriteAESIMCLd]>, VEX, VEX_WIG; } def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1), @@ -7622,13 +7622,13 @@ "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, - Sched<[WriteAESKeyGen]>, VEX; + Sched<[WriteAESKeyGen]>, VEX, VEX_WIG; def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>, - Sched<[WriteAESKeyGenLd]>, VEX; + Sched<[WriteAESKeyGenLd]>, VEX, VEX_WIG; } def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), @@ -7654,14 +7654,14 @@ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>, - Sched<[WriteCLMul]>; + Sched<[WriteCLMul]>, VEX_WIG; def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, u8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, (loadv2i64 addr:$src2), imm:$src3))]>, - Sched<[WriteCLMulLd, ReadAfterLd]>; + Sched<[WriteCLMulLd, ReadAfterLd]>, VEX_WIG; // Carry-less Multiplication instructions let Constraints = "$src1 = $dst" in { @@ -8113,11 +8113,11 @@ YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { // Zero All YMM registers def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", - [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, Requires<[HasAVX]>; + [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, Requires<[HasAVX]>, VEX_WIG; // Zero Upper bits of YMM registers def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", - [(int_x86_avx_vzeroupper)]>, PS, VEX, Requires<[HasAVX]>; + [(int_x86_avx_vzeroupper)]>, PS, VEX, Requires<[HasAVX]>, VEX_WIG; } //===----------------------------------------------------------------------===// Index: utils/TableGen/X86RecognizableInstr.h =================================================================== --- utils/TableGen/X86RecognizableInstr.h +++ utils/TableGen/X86RecognizableInstr.h @@ -55,8 +55,8 @@ bool HasREX_WPrefix; /// The hasVEX_4V field from the record bool HasVEX_4V; - /// The hasVEX_WPrefix field from the record - bool HasVEX_WPrefix; + /// The VEX_WPrefix field from the record + uint8_t VEX_WPrefix; /// Inferred from the operands; indicates whether the L bit in the VEX prefix is set bool HasVEX_LPrefix; /// The ignoreVEX_L field from the record Index: utils/TableGen/X86RecognizableInstr.cpp =================================================================== --- utils/TableGen/X86RecognizableInstr.cpp +++ utils/TableGen/X86RecognizableInstr.cpp @@ -138,6 +138,10 @@ enum { AdSize16 = 1, AdSize32 = 2, AdSize64 = 3 }; + + enum { + VEX_W0 = 0, VEX_W1 = 1, VEX_WIG = 2 + }; } using namespace X86Disassembler; @@ -203,7 +207,7 @@ AdSize = byteFromRec(Rec, "AdSizeBits"); HasREX_WPrefix = Rec->getValueAsBit("hasREX_WPrefix"); HasVEX_4V = Rec->getValueAsBit("hasVEX_4V"); - HasVEX_WPrefix = Rec->getValueAsBit("hasVEX_WPrefix"); + VEX_WPrefix = byteFromRec(Rec,"VEX_WPrefix"); IgnoresVEX_L = Rec->getValueAsBit("ignoresVEX_L"); HasEVEX_L2Prefix = Rec->getValueAsBit("hasEVEX_L2"); HasEVEX_K = Rec->getValueAsBit("hasEVEX_K"); @@ -280,7 +284,7 @@ llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled"); } // VEX_L & VEX_W - if (HasVEX_LPrefix && HasVEX_WPrefix) { + if (HasVEX_LPrefix && VEX_WPrefix == X86Local::VEX_W1) { if (OpPrefix == X86Local::PD) insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE); else if (OpPrefix == X86Local::XS) @@ -308,7 +312,7 @@ llvm_unreachable("Invalid prefix"); } } - else if (HasEVEX_L2Prefix && HasVEX_WPrefix) { + else if (HasEVEX_L2Prefix && VEX_WPrefix == X86Local::VEX_W1) { // EVEX_L2 & VEX_W if (OpPrefix == X86Local::PD) insnContext = EVEX_KB(IC_EVEX_L2_W_OPSIZE); @@ -337,7 +341,7 @@ llvm_unreachable("Invalid prefix"); } } - else if (HasVEX_WPrefix) { + else if (VEX_WPrefix == X86Local::VEX_W1) { // VEX_W if (OpPrefix == X86Local::PD) insnContext = EVEX_KB(IC_EVEX_W_OPSIZE); @@ -363,7 +367,7 @@ insnContext = EVEX_KB(IC_EVEX); /// eof EVEX } else if (Encoding == X86Local::VEX || Encoding == X86Local::XOP) { - if (HasVEX_LPrefix && HasVEX_WPrefix) { + if (HasVEX_LPrefix && VEX_WPrefix == X86Local::VEX_W1) { if (OpPrefix == X86Local::PD) insnContext = IC_VEX_L_W_OPSIZE; else if (OpPrefix == X86Local::XS) @@ -378,7 +382,7 @@ } } else if (OpPrefix == X86Local::PD && HasVEX_LPrefix) insnContext = IC_VEX_L_OPSIZE; - else if (OpPrefix == X86Local::PD && HasVEX_WPrefix) + else if (OpPrefix == X86Local::PD && VEX_WPrefix == X86Local::VEX_W1) insnContext = IC_VEX_W_OPSIZE; else if (OpPrefix == X86Local::PD) insnContext = IC_VEX_OPSIZE; @@ -386,11 +390,11 @@ insnContext = IC_VEX_L_XS; else if (HasVEX_LPrefix && OpPrefix == X86Local::XD) insnContext = IC_VEX_L_XD; - else if (HasVEX_WPrefix && OpPrefix == X86Local::XS) + else if (VEX_WPrefix == X86Local::VEX_W1 && OpPrefix == X86Local::XS) insnContext = IC_VEX_W_XS; - else if (HasVEX_WPrefix && OpPrefix == X86Local::XD) + else if (VEX_WPrefix == X86Local::VEX_W1 && OpPrefix == X86Local::XD) insnContext = IC_VEX_W_XD; - else if (HasVEX_WPrefix && OpPrefix == X86Local::PS) + else if (VEX_WPrefix == X86Local::VEX_W1 && OpPrefix == X86Local::PS) insnContext = IC_VEX_W; else if (HasVEX_LPrefix && OpPrefix == X86Local::PS) insnContext = IC_VEX_L;