Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -1725,12 +1725,12 @@ //===----------------------------------------------------------------------===// // -- VPERMI2 - 3 source operands form -- -let Sched = WriteFShuffle256 in +let Sched = WriteFVarShuffle256 in def AVX512_PERM2_F : OpndItins< IIC_SSE_SHUFP, IIC_SSE_SHUFP >; -let Sched = WriteShuffle256 in +let Sched = WriteVarShuffle256 in def AVX512_PERM2_I : OpndItins< IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI >; @@ -8969,7 +8969,7 @@ // // FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND? -let Sched = WriteShuffle256 in { +let Sched = WriteVarShuffle256 in { def AVX512_COMPRESS : OpndItins< IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM >; Index: llvm/trunk/lib/Target/X86/X86InstrMMX.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrMMX.td +++ llvm/trunk/lib/Target/X86/X86InstrMMX.td @@ -74,11 +74,12 @@ def MMX_PCK_ITINS : OpndItins< IIC_MMX_PCK_RR, IIC_MMX_PCK_RM >; +} // Sched +let Sched = WriteVarShuffle in def MMX_PSHUF_ITINS : OpndItins< IIC_MMX_PSHUF, IIC_MMX_PSHUF >; -} // Sched let Sched = WriteCvtF2I in { def MMX_CVT_PD_ITINS : OpndItins< Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -5050,7 +5050,7 @@ IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM >; } -let Sched = WriteShuffle in +let Sched = WriteVarShuffle in def SSE_PSHUFB : OpndItins< IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM >; @@ -7688,7 +7688,7 @@ // VPERMIL - Permute Single and Double Floating-Point Values // -let Sched = WriteFShuffle in +let Sched = WriteFVarShuffle in def AVX_VPERMILV : OpndItins< IIC_SSE_SHUFP, IIC_SSE_SHUFP >; @@ -7707,13 +7707,13 @@ (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V, - Sched<[WriteFShuffle]>; + Sched<[WriteFVarShuffle]>; def rm : AVX8I, VEX_4V, - Sched<[WriteFShuffleLd, ReadAfterLd]>; + Sched<[WriteFVarShuffleLd, ReadAfterLd]>; def ri : AVXAIi8; let ExeDomain = SSEPackedSingle in -defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256, +defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFVarShuffle256, f256mem>; multiclass avx2_perm_imm opc, string OpcodeStr, PatFrag mem_frag, Index: llvm/trunk/lib/Target/X86/X86InstrXOP.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrXOP.td +++ llvm/trunk/lib/Target/X86/X86InstrXOP.td @@ -279,7 +279,7 @@ [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), (vt128 VR128:$src3))))]>, - XOP_4V, Sched<[WriteShuffle]>; + XOP_4V, Sched<[WriteVarShuffle]>; def rrm : IXOPi8Reg, - XOP_4V, VEX_W, Sched<[WriteShuffleLd, ReadAfterLd, ReadAfterLd]>; + XOP_4V, VEX_W, Sched<[WriteVarShuffleLd, ReadAfterLd, ReadAfterLd]>; def rmr : IXOPi8Reg, - XOP_4V, Sched<[WriteShuffleLd, ReadAfterLd, + XOP_4V, Sched<[WriteVarShuffleLd, ReadAfterLd, // 128mem:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -307,7 +307,7 @@ (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, VEX_W, Sched<[WriteShuffle]>, FoldGenData; + []>, XOP_4V, VEX_W, Sched<[WriteVarShuffle]>, FoldGenData; } let ExeDomain = SSEPackedInt in { @@ -367,7 +367,7 @@ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), [(set RC:$dst, (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>, - Sched<[WriteFShuffle]>; + Sched<[WriteFVarShuffle]>; def rm : IXOP5, VEX_W, - Sched<[WriteFShuffleLd, ReadAfterLd, ReadAfterLd]>; + Sched<[WriteFVarShuffleLd, ReadAfterLd, ReadAfterLd]>; def mr : IXOP5, - Sched<[WriteFShuffleLd, ReadAfterLd, + Sched<[WriteFVarShuffleLd, ReadAfterLd, // fpmemop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, // RC:$src3 @@ -395,7 +395,7 @@ (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>, VEX_W, Sched<[WriteFShuffle]>, FoldGenData; + []>, VEX_W, Sched<[WriteFVarShuffle]>, FoldGenData; } let ExeDomain = SSEPackedDouble in { Index: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td +++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td @@ -162,6 +162,7 @@ defm : BWWriteResPair; // Floating point reciprocal square root estimate. defm : BWWriteResPair; // Fused Multiply Add. defm : BWWriteResPair; // Floating point vector shuffles. +defm : BWWriteResPair; // Floating point vector variable shuffles. defm : BWWriteResPair; // Floating point vector blends. defm : BWWriteResPair; // Fp vector variable blends. @@ -178,6 +179,7 @@ defm : BWWriteResPair; // Vector integer multiply. defm : BWWriteResPair; // PMULLD defm : BWWriteResPair; // Vector shuffles. +defm : BWWriteResPair; // Vector variable shuffles. defm : BWWriteResPair; // Vector blends. defm : BWWriteResPair; // Vector variable blends. defm : BWWriteResPair; // Vector MPSAD. @@ -288,7 +290,9 @@ // AVX2. defm : BWWriteResPair; // Fp 256-bit width vector shuffles. +defm : BWWriteResPair; // Fp 256-bit width vector variable shuffles. defm : BWWriteResPair; // 256-bit width vector shuffles. +defm : BWWriteResPair; // 256-bit width vector variable shuffles. defm : BWWriteResPair; // Variable vector shifts. // Old microcoded instructions that nobody use. @@ -366,7 +370,6 @@ "MMX_MOVD64to64rr", "MMX_MOVQ2DQrr", "MMX_PALIGNRrri", - "MMX_PSHUFBrr", "MMX_PSHUFWri", "MMX_PUNPCKHBWirr", "MMX_PUNPCKHDQirr", @@ -404,9 +407,7 @@ "VPBROADCASTDrr", "VPBROADCASTQrr", "VPERMILPD(Y?)ri", - "VPERMILPD(Y?)rr", "VPERMILPS(Y?)ri", - "VPERMILPS(Y?)rr", "(V?)PMOVSXBDrr", "(V?)PMOVSXBQrr", "(V?)PMOVSXBWrr", @@ -419,7 +420,6 @@ "(V?)PMOVZXDQrr", "(V?)PMOVZXWDrr", "(V?)PMOVZXWQrr", - "(V?)PSHUFB(Y?)rr", "(V?)PSHUFD(Y?)ri", "(V?)PSHUFHW(Y?)ri", "(V?)PSHUFLW(Y?)ri", @@ -891,9 +891,7 @@ "VPBROADCASTW(Y?)rr", "VPERM2F128rr", "VPERM2I128rr", - "VPERMDYrr", "VPERMPDYri", - "VPERMPSYrr", "VPERMQYri", "VPMOVSXBDYrr", "VPMOVSXBQYrr", Index: llvm/trunk/lib/Target/X86/X86SchedHaswell.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedHaswell.td +++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td @@ -159,8 +159,10 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; // Vector integer operations. @@ -174,8 +176,10 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -724,7 +728,6 @@ "MMX_MOVD64to64rr", "MMX_MOVQ2DQrr", "MMX_PALIGNRrri", - "MMX_PSHUFBrr", "MMX_PSHUFWri", "MMX_PUNPCKHBWirr", "MMX_PUNPCKHDQirr", @@ -762,9 +765,7 @@ "VPBROADCASTDrr", "VPBROADCASTQrr", "VPERMILPD(Y?)ri", - "VPERMILPD(Y?)rr", "VPERMILPS(Y?)ri", - "VPERMILPS(Y?)rr", "(V?)PMOVSXBDrr", "(V?)PMOVSXBQrr", "(V?)PMOVSXBWrr", @@ -777,7 +778,6 @@ "(V?)PMOVZXDQrr", "(V?)PMOVZXWDrr", "(V?)PMOVZXWQrr", - "(V?)PSHUFB(Y?)rr", "(V?)PSHUFD(Y?)ri", "(V?)PSHUFHW(Y?)ri", "(V?)PSHUFLW(Y?)ri", @@ -1780,9 +1780,7 @@ "VPBROADCASTWrr", "VPERM2F128rr", "VPERM2I128rr", - "VPERMDYrr", "VPERMPDYri", - "VPERMPSYrr", "VPERMQYri", "VPMOVSXBDYrr", "VPMOVSXBQYrr", Index: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td +++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td @@ -148,6 +148,7 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -162,6 +163,7 @@ defm : SBWriteResPair; defm : SBWriteResPair; // TODO this is probably wrong for 256/512-bit for the "generic" model defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -275,7 +277,9 @@ // AVX2/FMA is not supported on that architecture, but we should define the basic // scheduling resources anyway. defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -352,9 +356,7 @@ "(V?)ORPS(Y?)rr", "VPERM2F128rr", "VPERMILPD(Y?)ri", - "VPERMILPD(Y?)rr", "VPERMILPS(Y?)ri", - "VPERMILPS(Y?)rr", "(V?)SHUFPD(Y?)rri", "(V?)SHUFPS(Y?)rri", "(V?)UNPCKHPD(Y?)rr", @@ -408,7 +410,6 @@ "MMX_PABSWrr", "MMX_PADDQirr", "MMX_PALIGNRrri", - "MMX_PSHUFBrr", "MMX_PSIGNBrr", "MMX_PSIGNDrr", "MMX_PSIGNWrr", @@ -462,7 +463,6 @@ "(V?)PMOVZXDQrr", "(V?)PMOVZXWDrr", "(V?)PMOVZXWQrr", - "(V?)PSHUFBrr", "(V?)PSHUFDri", "(V?)PSHUFHWri", "(V?)PSHUFLWri", Index: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td +++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td @@ -159,6 +159,7 @@ defm : SKLWriteResPair; // Floating point reciprocal square root estimate. defm : SKLWriteResPair; // Fused Multiply Add. defm : SKLWriteResPair; // Floating point vector shuffles. +defm : SKLWriteResPair; // Floating point vector shuffles. defm : SKLWriteResPair; // Floating point vector blends. defm : SKLWriteResPair; // Fp vector variable blends. @@ -175,6 +176,7 @@ defm : SKLWriteResPair; // Vector integer multiply. defm : SKLWriteResPair; defm : SKLWriteResPair; // Vector shuffles. +defm : SKLWriteResPair; // Vector shuffles. defm : SKLWriteResPair; // Vector blends. defm : SKLWriteResPair; // Vector variable blends. defm : SKLWriteResPair; // Vector MPSAD. @@ -294,7 +296,9 @@ // AVX2. defm : SKLWriteResPair; // Fp 256-bit width vector shuffles. +defm : SKLWriteResPair; // Fp 256-bit width vector variable shuffles. defm : SKLWriteResPair; // 256-bit width vector shuffles. +defm : SKLWriteResPair; // 256-bit width vector variable shuffles. defm : SKLWriteResPair; // Variable vector shifts. // Old microcoded instructions that nobody use. @@ -367,7 +371,6 @@ "MMX_MOVD64rr", "MMX_MOVD64to64rr", "MMX_PALIGNRrri", - "MMX_PSHUFBrr", "MMX_PSHUFWri", "MMX_PUNPCKHBWirr", "MMX_PUNPCKHDQirr", @@ -397,9 +400,7 @@ "VPBROADCASTDrr", "VPBROADCASTQrr", "VPERMILPD(Y?)ri", - "VPERMILPD(Y?)rr", "VPERMILPS(Y?)ri", - "VPERMILPS(Y?)rr", "(V?)PMOVSXBDrr", "(V?)PMOVSXBQrr", "(V?)PMOVSXBWrr", @@ -412,7 +413,6 @@ "(V?)PMOVZXDQrr", "(V?)PMOVZXWDrr", "(V?)PMOVZXWQrr", - "(V?)PSHUFB(Y?)rr", "(V?)PSHUFD(Y?)ri", "(V?)PSHUFHW(Y?)ri", "(V?)PSHUFLW(Y?)ri", @@ -884,9 +884,7 @@ "(V?)PCMPGTQ(Y?)rr", "VPERM2F128rr", "VPERM2I128rr", - "VPERMDYrr", "VPERMPDYri", - "VPERMPSYrr", "VPERMQYri", "VPMOVSXBDYrr", "VPMOVSXBQYrr", Index: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td +++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td @@ -159,6 +159,7 @@ defm : SKXWriteResPair; // Floating point reciprocal square root estimate. defm : SKXWriteResPair; // Fused Multiply Add. defm : SKXWriteResPair; // Floating point vector shuffles. +defm : SKXWriteResPair; // Floating point vector variable shuffles. defm : SKXWriteResPair; // Floating point vector blends. defm : SKXWriteResPair; // Fp vector variable blends. @@ -175,6 +176,7 @@ defm : SKXWriteResPair; // Vector integer multiply. defm : SKXWriteResPair; // Vector integer multiply. defm : SKXWriteResPair; // Vector shuffles. +defm : SKXWriteResPair; // Vector variable shuffles. defm : SKXWriteResPair; // Vector blends. defm : SKXWriteResPair; // Vector variable blends. defm : SKXWriteResPair; // Vector MPSAD. @@ -294,7 +296,9 @@ // AVX2. defm : SKXWriteResPair; // Fp 256-bit width vector shuffles. +defm : SKXWriteResPair; // Fp 256-bit width vector variable shuffles. defm : SKXWriteResPair; // 256-bit width vector shuffles. +defm : SKXWriteResPair; // 256-bit width vector variable shuffles. defm : SKXWriteResPair; // Variable vector shifts. // Old microcoded instructions that nobody use. @@ -412,7 +416,6 @@ "MMX_MOVD64rr", "MMX_MOVD64to64rr", "MMX_PALIGNRrri", - "MMX_PSHUFBrr", "MMX_PSHUFWri", "MMX_PUNPCKHBWirr", "MMX_PUNPCKHDQirr", @@ -447,7 +450,6 @@ "PMOVZXDQrr", "PMOVZXWDrr", "PMOVZXWQrr", - "PSHUFBrr", "PSHUFDri", "PSHUFHWri", "PSHUFLWri", @@ -530,25 +532,15 @@ "VPBROADCASTDrr", "VPBROADCASTQrr", "VPERMILPDYri", - "VPERMILPDYrr", "VPERMILPDZ128ri", - "VPERMILPDZ128rr", "VPERMILPDZ256ri", - "VPERMILPDZ256rr", "VPERMILPDZri", - "VPERMILPDZrr", "VPERMILPDri", - "VPERMILPDrr", "VPERMILPSYri", - "VPERMILPSYrr", "VPERMILPSZ128ri", - "VPERMILPSZ128rr", "VPERMILPSZ256ri", - "VPERMILPSZ256rr", "VPERMILPSZri", - "VPERMILPSZrr", "VPERMILPSri", - "VPERMILPSrr", "VPMOVSXBDrr", "VPMOVSXBQrr", "VPMOVSXBWrr", @@ -561,11 +553,6 @@ "VPMOVZXDQrr", "VPMOVZXWDrr", "VPMOVZXWQrr", - "VPSHUFBYrr", - "VPSHUFBZ128rr", - "VPSHUFBZ256rr", - "VPSHUFBZrr", - "VPSHUFBrr", "VPSHUFDYri", "VPSHUFDZ128ri", "VPSHUFDZ256ri", @@ -1859,46 +1846,12 @@ "VPCMPWZrri", "VPERM2F128rr", "VPERM2I128rr", - "VPERMDYrr", - "VPERMDZ256rr", - "VPERMDZrr", - "VPERMI2D128rr", - "VPERMI2D256rr", - "VPERMI2Drr", - "VPERMI2PD128rr", - "VPERMI2PD256rr", - "VPERMI2PDrr", - "VPERMI2PS128rr", - "VPERMI2PS256rr", - "VPERMI2PSrr", - "VPERMI2Q128rr", - "VPERMI2Q256rr", - "VPERMI2Qrr", "VPERMPDYri", "VPERMPDZ256ri", - "VPERMPDZ256rr", "VPERMPDZri", - "VPERMPDZrr", - "VPERMPSYrr", - "VPERMPSZ256rr", - "VPERMPSZrr", "VPERMQYri", "VPERMQZ256ri", - "VPERMQZ256rr", "VPERMQZri", - "VPERMQZrr", - "VPERMT2D128rr", - "VPERMT2D256rr", - "VPERMT2Drr", - "VPERMT2PD128rr", - "VPERMT2PD256rr", - "VPERMT2PDrr", - "VPERMT2PS128rr", - "VPERMT2PS256rr", - "VPERMT2PSrr", - "VPERMT2Q128rr", - "VPERMT2Q256rr", - "VPERMT2Qrr", "VPMAXSQZ128rr", "VPMAXSQZ256rr", "VPMAXSQZrr", Index: llvm/trunk/lib/Target/X86/X86Schedule.td =================================================================== --- llvm/trunk/lib/Target/X86/X86Schedule.td +++ llvm/trunk/lib/Target/X86/X86Schedule.td @@ -87,6 +87,7 @@ defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. +defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles. defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. @@ -106,6 +107,7 @@ defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. defm WritePMULLD : X86SchedWritePair; // PMULLD defm WriteShuffle : X86SchedWritePair; // Vector shuffles. +defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles. defm WriteBlend : X86SchedWritePair; // Vector blends. defm WriteVarBlend : X86SchedWritePair; // Vector variable blends. defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD. @@ -150,7 +152,9 @@ // AVX2. defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles. +defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles. defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles. +defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles. defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts. // Old microcoded instructions that nobody use. Index: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td +++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td @@ -301,9 +301,11 @@ defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. //////////////////////////////////////////////////////////////////////////////// // Conversions. @@ -367,10 +369,12 @@ defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. //////////////////////////////////////////////////////////////////////////////// @@ -750,34 +754,6 @@ } def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>; -def JWritePSHUFB: SchedWriteRes<[JFPU01, JVALU]> { - let Latency = 2; - let ResourceCycles = [1, 4]; - let NumMicroOps = 3; -} -def : InstRW<[JWritePSHUFB], (instrs MMX_PSHUFBrr, PSHUFBrr, VPSHUFBrr)>; - -def JWritePSHUFBLd: SchedWriteRes<[JLAGU, JFPU01, JVALU]> { - let Latency = 7; - let ResourceCycles = [1, 1, 4]; - let NumMicroOps = 3; -} -def : InstRW<[JWritePSHUFBLd, ReadAfterLd], (instrs MMX_PSHUFBrm, PSHUFBrm, VPSHUFBrm)>; - -def JWriteVPERM: SchedWriteRes<[JFPU01, JFPX]> { - let Latency = 2; - let ResourceCycles = [1, 4]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteVPERM], (instrs VPERMILPDrr, VPERMILPSrr)>; - -def JWriteVPERMLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { - let Latency = 7; - let ResourceCycles = [1, 1, 4]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteVPERMLd, ReadAfterLd], (instrs VPERMILPDrm, VPERMILPSrm)>; - def JWriteVPERMY: SchedWriteRes<[JFPU01, JFPX]> { let Latency = 3; let ResourceCycles = [2, 6]; Index: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td +++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td @@ -134,6 +134,7 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; // Vector integer operations. @@ -149,6 +150,7 @@ //defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; @@ -255,7 +257,9 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; Index: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td +++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td @@ -201,6 +201,7 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -219,8 +220,10 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; // FIXME defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; // Vector Shift Operations defm : ZnWriteResFpuPair; @@ -241,6 +244,7 @@ // Following instructions with latency=100 are microcoded. // We set long latency so as to block the entire pipeline. defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; //Microcoded Instructions let Latency = 100 in { Index: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll @@ -5131,8 +5131,8 @@ define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_pshufb: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshufb: Index: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll @@ -7589,7 +7589,7 @@ define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { ; GENERIC-LABEL: test_build_vec_v64i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_build_vec_v64i1: Index: llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll @@ -4535,7 +4535,7 @@ ; GENERIC-LABEL: test_masked_16xi8_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4555,7 +4555,7 @@ ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mask0: @@ -4572,7 +4572,7 @@ ; GENERIC-LABEL: test_masked_16xi8_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4592,7 +4592,7 @@ ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mask1: @@ -4609,7 +4609,7 @@ ; GENERIC-LABEL: test_masked_16xi8_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4629,7 +4629,7 @@ ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mask2: @@ -4659,7 +4659,7 @@ ; GENERIC-LABEL: test_masked_16xi8_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4679,7 +4679,7 @@ ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mask3: @@ -4713,7 +4713,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi8_perm_mem_mask0: @@ -4734,7 +4734,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask0: @@ -4755,7 +4755,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi8_perm_mem_mask1: @@ -4776,7 +4776,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask1: @@ -4797,7 +4797,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi8_perm_mem_mask2: @@ -4818,7 +4818,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask2: @@ -4855,7 +4855,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi8_perm_mem_mask3: @@ -4876,7 +4876,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask3: @@ -4895,7 +4895,7 @@ define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) { ; GENERIC-LABEL: test_32xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi8_perm_mask0: @@ -4909,7 +4909,7 @@ ; GENERIC-LABEL: test_masked_32xi8_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4929,7 +4929,7 @@ ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mask0: @@ -4946,7 +4946,7 @@ ; GENERIC-LABEL: test_masked_32xi8_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4966,7 +4966,7 @@ ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mask1: @@ -4983,7 +4983,7 @@ ; GENERIC-LABEL: test_masked_32xi8_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5003,7 +5003,7 @@ ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mask2: @@ -5019,7 +5019,7 @@ define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) { ; GENERIC-LABEL: test_32xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi8_perm_mask3: @@ -5033,7 +5033,7 @@ ; GENERIC-LABEL: test_masked_32xi8_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5053,7 +5053,7 @@ ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mask3: @@ -5070,7 +5070,7 @@ ; GENERIC-LABEL: test_32xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi8_perm_mem_mask0: @@ -5087,7 +5087,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi8_perm_mem_mask0: @@ -5108,7 +5108,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask0: @@ -5129,7 +5129,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi8_perm_mem_mask1: @@ -5150,7 +5150,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask1: @@ -5171,7 +5171,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi8_perm_mem_mask2: @@ -5192,7 +5192,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask2: @@ -5212,7 +5212,7 @@ ; GENERIC-LABEL: test_32xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi8_perm_mem_mask3: @@ -5229,7 +5229,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi8_perm_mem_mask3: @@ -5250,7 +5250,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask3: @@ -5269,7 +5269,7 @@ define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) { ; GENERIC-LABEL: test_64xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_64xi8_perm_mask0: @@ -5283,7 +5283,7 @@ ; GENERIC-LABEL: test_masked_64xi8_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5303,7 +5303,7 @@ ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask0: @@ -5320,7 +5320,7 @@ ; GENERIC-LABEL: test_masked_64xi8_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5340,7 +5340,7 @@ ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask1: @@ -5357,7 +5357,7 @@ ; GENERIC-LABEL: test_masked_64xi8_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5377,7 +5377,7 @@ ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask2: @@ -5393,7 +5393,7 @@ define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) { ; GENERIC-LABEL: test_64xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_64xi8_perm_mask3: @@ -5407,7 +5407,7 @@ ; GENERIC-LABEL: test_masked_64xi8_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5427,7 +5427,7 @@ ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask3: @@ -5444,7 +5444,7 @@ ; GENERIC-LABEL: test_64xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [6:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_64xi8_perm_mem_mask0: @@ -5461,7 +5461,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask0: @@ -5482,7 +5482,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask0: @@ -5503,7 +5503,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask1: @@ -5524,7 +5524,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask1: @@ -5545,7 +5545,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask2: @@ -5566,7 +5566,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask2: @@ -5586,7 +5586,7 @@ ; GENERIC-LABEL: test_64xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [6:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_64xi8_perm_mem_mask3: @@ -5603,7 +5603,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask3: @@ -5624,7 +5624,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask3: Index: llvm/trunk/test/CodeGen/X86/xop-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/xop-schedule.ll +++ llvm/trunk/test/CodeGen/X86/xop-schedule.ll @@ -843,9 +843,9 @@ ; GENERIC-LABEL: test_vpperm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00] -; GENERIC-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:0.50] +; GENERIC-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ;