Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -8014,8 +8014,8 @@ let Predicates = [HasAVX512] in defm VCVTPH2PSZ : avx512_cvtph2ps, - avx512_cvtph2ps_sae, + WriteCvtPH2PSZ>, + avx512_cvtph2ps_sae, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { @@ -8068,8 +8068,8 @@ let Predicates = [HasAVX512] in { defm VCVTPS2PHZ : avx512_cvtps2ph, - avx512_cvtps2ph_sae, + WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, + avx512_cvtps2ph_sae, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { defm VCVTPS2PHZ256 : avx512_cvtps2ph; +// Unsupported. +let Unsupported = 1 in { +def : WriteRes; +def : WriteRes; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +} // Unsupported + // Arithmetic. defm : BWWriteResPair; // Simple integer ALU op. defm : BWWriteResPair; // Integer ALU + flags op. @@ -212,22 +254,18 @@ //defm : BWWriteResPair; // Floating point division. defm : BWWriteResPair; // Floating point division (XMM). defm : BWWriteResPair; // Floating point division (YMM). -defm : BWWriteResPair; // Floating point division (ZMM). //defm : BWWriteResPair; // Floating point division. defm : BWWriteResPair; // Floating point division (XMM). defm : BWWriteResPair; // Floating point division (YMM). -defm : BWWriteResPair; // Floating point division (ZMM). defm : X86WriteRes; // Floating point square root. defm : X86WriteRes; defm : BWWriteResPair; // Floating point square root (XMM). defm : BWWriteResPair; // Floating point square root (YMM). -defm : BWWriteResPair; // Floating point square root (ZMM). defm : X86WriteRes; // Floating point double square root. defm : X86WriteRes; defm : BWWriteResPair; // Floating point double square root (XMM). defm : BWWriteResPair; // Floating point double square root (YMM). -defm : BWWriteResPair; // Floating point double square root (ZMM). defm : BWWriteResPair; // Floating point long double square root. defm : BWWriteResPair; // Floating point reciprocal estimate. @@ -1688,4 +1726,3 @@ def: InstRW<[WriteZero], (instrs CLC)>; } // SchedModel - Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -136,6 +136,48 @@ // the port to read all inputs. We don't model that. def : WriteRes; +// Unsupported. +let Unsupported = 1 in { +def : WriteRes; +def : WriteRes; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +} // Unsupported + // Bit counts. defm : HWWriteResPair; defm : HWWriteResPair; @@ -203,11 +245,9 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -220,11 +260,9 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -96,6 +96,48 @@ // 2/3 cycle to recompute the address. def : WriteRes; +// Unsupported. +let Unsupported = 1 in { +def : WriteRes; +def : WriteRes; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +} // Unsupported + def : WriteRes; def : WriteRes; def : WriteRes { let Latency = 5; } @@ -195,11 +237,9 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -212,11 +252,9 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; Index: lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- lib/Target/X86/X86SchedSkylakeClient.td +++ lib/Target/X86/X86SchedSkylakeClient.td @@ -104,6 +104,48 @@ // 2/3/7 cycle to recompute the address. def : WriteRes; +// Unsupported. +let Unsupported = 1 in { +def : WriteRes; +def : WriteRes; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +} // Unsupported + // Arithmetic. defm : SKLWriteResPair; // Simple integer ALU op. defm : SKLWriteResPair; // Integer ALU + flags op. @@ -208,20 +250,16 @@ defm : SKLWriteResPair; // Floating point division. //defm : SKLWriteResPair; // Floating point division (XMM). defm : SKLWriteResPair; // Floating point division (YMM). -defm : SKLWriteResPair; // Floating point division (ZMM). //defm : SKLWriteResPair; // Floating point double division. //defm : SKLWriteResPair; // Floating point double division (XMM). //defm : SKLWriteResPair; // Floating point double division (YMM). -defm : SKLWriteResPair; // Floating point double division (ZMM). defm : SKLWriteResPair; // Floating point square root. defm : SKLWriteResPair; // Floating point square root (XMM). defm : SKLWriteResPair; // Floating point square root (YMM). -defm : SKLWriteResPair; // Floating point square root (ZMM). defm : SKLWriteResPair; // Floating point double square root. defm : SKLWriteResPair; // Floating point double square root (XMM). defm : SKLWriteResPair; // Floating point double square root (YMM). -defm : SKLWriteResPair; // Floating point double square root (ZMM). defm : SKLWriteResPair; // Floating point long double square root. defm : SKLWriteResPair; // Floating point reciprocal estimate. Index: lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- lib/Target/X86/X86SchedSkylakeServer.td +++ lib/Target/X86/X86SchedSkylakeServer.td @@ -184,26 +184,32 @@ defm : SKXWriteResPair; // Floating point add/sub. defm : SKXWriteResPair; // Floating point add/sub (XMM). -defm : SKXWriteResPair; // Floating point add/sub (YMM/ZMM). +defm : SKXWriteResPair; // Floating point add/sub (YMM). +defm : SKXWriteResPair; // Floating point add/sub (ZMM). defm : SKXWriteResPair; // Floating point double add/sub. defm : SKXWriteResPair; // Floating point double add/sub (XMM). -defm : SKXWriteResPair; // Floating point double add/sub (YMM/ZMM). +defm : SKXWriteResPair; // Floating point double add/sub (YMM). +defm : SKXWriteResPair; // Floating point double add/sub (ZMM). defm : SKXWriteResPair; // Floating point compare. defm : SKXWriteResPair; // Floating point compare (XMM). -defm : SKXWriteResPair; // Floating point compare (YMM/ZMM). +defm : SKXWriteResPair; // Floating point compare (YMM). +defm : SKXWriteResPair; // Floating point compare (ZMM). defm : SKXWriteResPair; // Floating point double compare. defm : SKXWriteResPair; // Floating point double compare (XMM). -defm : SKXWriteResPair; // Floating point double compare (YMM/ZMM). +defm : SKXWriteResPair; // Floating point double compare (YMM). +defm : SKXWriteResPair; // Floating point double compare (ZMM). defm : SKXWriteResPair; // Floating point compare to flags. defm : SKXWriteResPair; // Floating point multiplication. defm : SKXWriteResPair; // Floating point multiplication (XMM). -defm : SKXWriteResPair; // Floating point multiplication (YMM/ZMM). +defm : SKXWriteResPair; // Floating point multiplication (YMM). +defm : SKXWriteResPair; // Floating point multiplication (ZMM). defm : SKXWriteResPair; // Floating point double multiplication. defm : SKXWriteResPair; // Floating point double multiplication (XMM). -defm : SKXWriteResPair; // Floating point double multiplication (YMM/ZMM). +defm : SKXWriteResPair; // Floating point double multiplication (YMM). +defm : SKXWriteResPair; // Floating point double multiplication (ZMM). defm : SKXWriteResPair; // 10-14 cycles. // Floating point division. //defm : SKXWriteResPair; // 10-14 cycles. // Floating point division (XMM). @@ -226,33 +232,45 @@ defm : SKXWriteResPair; // Floating point reciprocal estimate. defm : SKXWriteResPair; // Floating point reciprocal estimate (XMM). -defm : SKXWriteResPair; // Floating point reciprocal estimate (YMM/ZMM). +defm : SKXWriteResPair; // Floating point reciprocal estimate (YMM). +defm : SKXWriteResPair; // Floating point reciprocal estimate (ZMM). + defm : SKXWriteResPair; // Floating point reciprocal square root estimate. defm : SKXWriteResPair; // Floating point reciprocal square root estimate (XMM). -defm : SKXWriteResPair; // Floating point reciprocal square root estimate (YMM/ZMM). +defm : SKXWriteResPair; // Floating point reciprocal square root estimate (YMM). +defm : SKXWriteResPair; // Floating point reciprocal square root estimate (ZMM). + defm : SKXWriteResPair; // Fused Multiply Add. defm : SKXWriteResPair; // Fused Multiply Add (XMM). -defm : SKXWriteResPair; // Fused Multiply Add (YMM/ZMM). +defm : SKXWriteResPair; // Fused Multiply Add (YMM). +defm : SKXWriteResPair; // Fused Multiply Add (ZMM). defm : SKXWriteResPair; // Floating point double dot product. defm : SKXWriteResPair; // Floating point single dot product. defm : SKXWriteResPair; // Floating point single dot product (YMM). defm : SKXWriteResPair; // Floating point fabs/fchs. defm : SKXWriteResPair; // Floating point rounding. -defm : SKXWriteResPair; // Floating point rounding (YMM/ZMM). +defm : SKXWriteResPair; // Floating point rounding (YMM). +defm : SKXWriteResPair; // Floating point rounding (ZMM). defm : SKXWriteResPair; // Floating point and/or/xor logicals. -defm : SKXWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). +defm : SKXWriteResPair; // Floating point and/or/xor logicals (YMM). +defm : SKXWriteResPair; // Floating point and/or/xor logicals (ZMM). defm : SKXWriteResPair; // Floating point TEST instructions. -defm : SKXWriteResPair; // Floating point TEST instructions (YMM/ZMM). +defm : SKXWriteResPair; // Floating point TEST instructions (YMM). +defm : SKXWriteResPair; // Floating point TEST instructions (ZMM). defm : SKXWriteResPair; // Floating point vector shuffles. -defm : SKXWriteResPair; // Floating point vector shuffles (YMM/ZMM). +defm : SKXWriteResPair; // Floating point vector shuffles (YMM). +defm : SKXWriteResPair; // Floating point vector shuffles (ZMM). defm : SKXWriteResPair; // Floating point vector variable shuffles. defm : SKXWriteResPair; // Floating point vector variable shuffles. +defm : SKXWriteResPair; // Floating point vector variable shuffles. defm : SKXWriteResPair; // Floating point vector blends. defm : SKXWriteResPair; // Floating point vector blends. +defm : SKXWriteResPair; // Floating point vector blends. defm : SKXWriteResPair; // Fp vector variable blends. defm : SKXWriteResPair; // Fp vector variable blends. +defm : SKXWriteResPair; // Fp vector variable blends. // FMA Scheduling helper class. // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } @@ -280,46 +298,61 @@ defm : SKXWriteResPair; // Vector integer ALU op, no logicals. defm : SKXWriteResPair; // Vector integer ALU op, no logicals (XMM). -defm : SKXWriteResPair; // Vector integer ALU op, no logicals (YMM/ZMM). +defm : SKXWriteResPair; // Vector integer ALU op, no logicals (YMM). +defm : SKXWriteResPair; // Vector integer ALU op, no logicals (ZMM). defm : SKXWriteResPair; // Vector integer and/or/xor. defm : SKXWriteResPair; // Vector integer and/or/xor (XMM). -defm : SKXWriteResPair; // Vector integer and/or/xor (YMM/ZMM). +defm : SKXWriteResPair; // Vector integer and/or/xor (YMM). +defm : SKXWriteResPair; // Vector integer and/or/xor (ZMM). defm : SKXWriteResPair; // Vector integer TEST instructions. -defm : SKXWriteResPair; // Vector integer TEST instructions (YMM/ZMM). +defm : SKXWriteResPair; // Vector integer TEST instructions (YMM). +defm : SKXWriteResPair; // Vector integer TEST instructions (ZMM). defm : SKXWriteResPair; // Vector integer multiply. defm : SKXWriteResPair; // Vector integer multiply (XMM). -defm : SKXWriteResPair; // Vector integer multiply (YMM/ZMM). +defm : SKXWriteResPair; // Vector integer multiply (YMM). +defm : SKXWriteResPair; // Vector integer multiply (ZMM). defm : SKXWriteResPair; // Vector PMULLD. -defm : SKXWriteResPair; // Vector PMULLD (YMM/ZMM). +defm : SKXWriteResPair; // Vector PMULLD (YMM). +defm : SKXWriteResPair; // Vector PMULLD (ZMM). defm : SKXWriteResPair; // Vector shuffles. defm : SKXWriteResPair; // Vector shuffles (XMM). -defm : SKXWriteResPair; // Vector shuffles (YMM/ZMM). +defm : SKXWriteResPair; // Vector shuffles (YMM). +defm : SKXWriteResPair; // Vector shuffles (ZMM). defm : SKXWriteResPair; // Vector variable shuffles. defm : SKXWriteResPair; // Vector variable shuffles (XMM). -defm : SKXWriteResPair; // Vector variable shuffles (YMM/ZMM). +defm : SKXWriteResPair; // Vector variable shuffles (YMM). +defm : SKXWriteResPair; // Vector variable shuffles (ZMM). defm : SKXWriteResPair; // Vector blends. -defm : SKXWriteResPair; // Vector blends (YMM/ZMM). +defm : SKXWriteResPair; // Vector blends (YMM). +defm : SKXWriteResPair; // Vector blends (ZMM). defm : SKXWriteResPair; // Vector variable blends. -defm : SKXWriteResPair; // Vector variable blends (YMM/ZMM). +defm : SKXWriteResPair; // Vector variable blends (YMM). +defm : SKXWriteResPair; // Vector variable blends (ZMM). defm : SKXWriteResPair; // Vector MPSAD. defm : SKXWriteResPair; // Vector MPSAD. +defm : SKXWriteResPair; // Vector MPSAD. defm : SKXWriteResPair; // Vector PSADBW. defm : SKXWriteResPair; // Vector PSADBW. defm : SKXWriteResPair; // Vector PSADBW. +defm : SKXWriteResPair; // Vector PSADBW. defm : SKXWriteResPair; // Vector PHMINPOS. // Vector integer shifts. defm : SKXWriteResPair; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; defm : SKXWriteResPair; defm : SKXWriteResPair; // Vector integer immediate shifts (XMM). -defm : SKXWriteResPair; // Vector integer immediate shifts (YMM/ZMM). +defm : SKXWriteResPair; // Vector integer immediate shifts (YMM). +defm : SKXWriteResPair; // Vector integer immediate shifts (ZMM). defm : SKXWriteResPair; // Variable vector shifts. -defm : SKXWriteResPair; // Variable vector shifts (YMM/ZMM). +defm : SKXWriteResPair; // Variable vector shifts (YMM). +defm : SKXWriteResPair; // Variable vector shifts (ZMM). // Vector insert/extract operations. def : WriteRes { @@ -346,33 +379,43 @@ defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; // Strings instructions. Index: lib/Target/X86/X86Schedule.td =================================================================== --- lib/Target/X86/X86Schedule.td +++ lib/Target/X86/X86Schedule.td @@ -163,23 +163,29 @@ defm WriteFAdd : X86SchedWritePair; // Floating point add/sub. defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM). -defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM/ZMM). +defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM). +defm WriteFAddZ : X86SchedWritePair; // Floating point add/sub (ZMM). defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub. defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM). -defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM/ZMM). +defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM). +defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM). defm WriteFCmp : X86SchedWritePair; // Floating point compare. defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM). -defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM/ZMM). +defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM). +defm WriteFCmpZ : X86SchedWritePair; // Floating point compare (ZMM). defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare. defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM). -defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM/ZMM). +defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM). +defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM). defm WriteFCom : X86SchedWritePair; // Floating point compare to flags. defm WriteFMul : X86SchedWritePair; // Floating point multiplication. defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM). -defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM/ZMM). +defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM). +defm WriteFMulZ : X86SchedWritePair; // Floating point multiplication (YMM). defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication. defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM). -defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM/ZMM). +defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM). +defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM). defm WriteFDiv : X86SchedWritePair; // Floating point division. defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM). defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM). @@ -199,41 +205,54 @@ defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root. defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate. defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM). -defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM/ZMM). +defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM). +defm WriteFRcpZ : X86SchedWritePair; // Floating point reciprocal estimate (ZMM). defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM). -defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM/ZMM). +defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM). +defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM). defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM). -defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM/ZMM). +defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM). +defm WriteFMAZ : X86SchedWritePair; // Fused Multiply Add (ZMM). defm WriteDPPD : X86SchedWritePair; // Floating point double dot product. defm WriteDPPS : X86SchedWritePair; // Floating point single dot product. defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM). +defm WriteDPPSZ : X86SchedWritePair; // Floating point single dot product (ZMM). defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs. defm WriteFRnd : X86SchedWritePair; // Floating point rounding. -defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM/ZMM). +defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM). +defm WriteFRndZ : X86SchedWritePair; // Floating point rounding (ZMM). defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals. -defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM). +defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM). +defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM). defm WriteFTest : X86SchedWritePair; // Floating point TEST instructions. -defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM/ZMM). +defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM). +defm WriteFTestZ : X86SchedWritePair; // Floating point TEST instructions (ZMM). defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. -defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM/ZMM). +defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM). +defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM). defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles. -defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM/ZMM). +defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM). +defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM). defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. -defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM/ZMM). +defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM). +defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM). defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. -defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM/ZMM). +defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM). +defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM). // FMA Scheduling helper class. class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } // Horizontal Add/Sub (float and integer) defm WriteFHAdd : X86SchedWritePair; -defm WriteFHAddY : X86SchedWritePair; // YMM/ZMM. +defm WriteFHAddY : X86SchedWritePair; +defm WriteFHAddZ : X86SchedWritePair; defm WritePHAdd : X86SchedWritePair; -defm WritePHAddX : X86SchedWritePair; // XMM. -defm WritePHAddY : X86SchedWritePair; // YMM/ZMM. +defm WritePHAddX : X86SchedWritePair; +defm WritePHAddY : X86SchedWritePair; +defm WritePHAddZ : X86SchedWritePair; // Vector integer operations. def WriteVecLoad : SchedWrite; @@ -258,38 +277,51 @@ defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM). -defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM). +defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM). +defm WriteVecALUZ : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM). defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals. defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM). -defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM). +defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM). +defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM). defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions. -defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM/ZMM). +defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM). +defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM). defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default). defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM). -defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM/ZMM). +defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM). +defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM). defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default). defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM). -defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM/ZMM). +defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM). +defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM). defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply (default). defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM). -defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM/ZMM). +defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM). +defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM). defm WritePMULLD : X86SchedWritePair; // Vector PMULLD. -defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM/ZMM). +defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM). +defm WritePMULLDZ : X86SchedWritePair; // Vector PMULLD (ZMM). defm WriteShuffle : X86SchedWritePair; // Vector shuffles. defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM). -defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM/ZMM). +defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM). +defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM). defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles. defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM). -defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM/ZMM). +defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM). +defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM). defm WriteBlend : X86SchedWritePair; // Vector blends. -defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM/ZMM). +defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM). +defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM). defm WriteVarBlend : X86SchedWritePair; // Vector variable blends. -defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM/ZMM). +defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM). +defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM). defm WritePSADBW : X86SchedWritePair; // Vector PSADBW. defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM). -defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM/ZMM). +defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM). +defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM). defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD. -defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM/ZMM). +defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM). +defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM). defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS. // Vector insert/extract operations. @@ -306,35 +338,44 @@ // Conversion between integer and float. defm WriteCvtSD2I : X86SchedWritePair; // Double -> Integer. defm WriteCvtPD2I : X86SchedWritePair; // Double -> Integer (XMM). -defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM/ZMM). +defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM). +defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM). defm WriteCvtSS2I : X86SchedWritePair; // Float -> Integer. defm WriteCvtPS2I : X86SchedWritePair; // Float -> Integer (XMM). -defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM/ZMM). +defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM). +defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM). defm WriteCvtI2SD : X86SchedWritePair; // Integer -> Double. defm WriteCvtI2PD : X86SchedWritePair; // Integer -> Double (XMM). -defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM/ZMM). +defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM). +defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM). defm WriteCvtI2SS : X86SchedWritePair; // Integer -> Float. defm WriteCvtI2PS : X86SchedWritePair; // Integer -> Float (XMM). -defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM/ZMM). +defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM). +defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM). defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion. defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM). -defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM/ZMM). +defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM). +defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM). defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion. defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM). -defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM/ZMM). +defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM). +defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM). defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion. -defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM/ZMM). +defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM). +defm WriteCvtPH2PSZ : X86SchedWritePair; // Half -> Float size conversion (ZMM). def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion. -def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM/ZMM). +def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM). +def WriteCvtPS2PHZ : SchedWrite; // // Float -> Half size conversion (ZMM). def WriteCvtPS2PHSt : SchedWrite; // // Float -> Half + store size conversion. -def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM/ZMM). +def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM). +def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM). // CRC32 instruction. defm WriteCRC32 : X86SchedWritePair; @@ -373,7 +414,8 @@ defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles. defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles. defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts. -defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM/ZMM). +defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM). +defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM). // Old microcoded instructions that nobody use. def WriteMicrocoded : SchedWrite; @@ -427,25 +469,25 @@ // Vector width wrappers. def SchedWriteFAdd - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFAdd64 - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFHAdd - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFCmp - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFCmp64 - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFMul - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFMul64 - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFMA - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteDPPD : X86SchedWriteWidths; def SchedWriteDPPS - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFDiv : X86SchedWriteWidths; def SchedWriteFDiv64 @@ -457,90 +499,90 @@ : X86SchedWriteWidths; def SchedWriteFRcp - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFRsqrt - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFRnd - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFLogic - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFTest - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFShuffle : X86SchedWriteWidths; + WriteFShuffleY, WriteFShuffleZ>; def SchedWriteFVarShuffle : X86SchedWriteWidths; + WriteFVarShuffleY, WriteFVarShuffleZ>; def SchedWriteFBlend - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFVarBlend : X86SchedWriteWidths; + WriteFVarBlendY, WriteFVarBlendZ>; def SchedWriteCvtDQ2PD : X86SchedWriteWidths; + WriteCvtI2PDY, WriteCvtI2PDZ>; def SchedWriteCvtDQ2PS : X86SchedWriteWidths; + WriteCvtI2PSY, WriteCvtI2PSZ>; def SchedWriteCvtPD2DQ : X86SchedWriteWidths; + WriteCvtPD2IY, WriteCvtPD2IZ>; def SchedWriteCvtPS2DQ : X86SchedWriteWidths; + WriteCvtPS2IY, WriteCvtPS2IZ>; def SchedWriteCvtPS2PD : X86SchedWriteWidths; + WriteCvtPS2PDY, WriteCvtPS2PDZ>; def SchedWriteCvtPD2PS : X86SchedWriteWidths; + WriteCvtPD2PSY, WriteCvtPD2PSZ>; def SchedWriteVecALU - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWritePHAdd - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteVecLogic : X86SchedWriteWidths; + WriteVecLogicY, WriteVecLogicZ>; def SchedWriteVecTest : X86SchedWriteWidths; + WriteVecTestY, WriteVecTestZ>; def SchedWriteVecShift : X86SchedWriteWidths; + WriteVecShiftY, WriteVecShiftZ>; def SchedWriteVecShiftImm : X86SchedWriteWidths; + WriteVecShiftImmY, WriteVecShiftImmZ>; def SchedWriteVarVecShift : X86SchedWriteWidths; + WriteVarVecShiftY, WriteVarVecShiftZ>; def SchedWriteVecIMul : X86SchedWriteWidths; + WriteVecIMulY, WriteVecIMulZ>; def SchedWritePMULLD : X86SchedWriteWidths; + WritePMULLDY, WritePMULLDZ>; def SchedWriteMPSAD : X86SchedWriteWidths; + WriteMPSADY, WriteMPSADZ>; def SchedWritePSADBW : X86SchedWriteWidths; + WritePSADBWY, WritePSADBWZ>; def SchedWriteShuffle : X86SchedWriteWidths; + WriteShuffleY, WriteShuffleZ>; def SchedWriteVarShuffle : X86SchedWriteWidths; + WriteVarShuffleY, WriteVarShuffleZ>; def SchedWriteBlend - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteVarBlend : X86SchedWriteWidths; + WriteVarBlendY, WriteVarBlendZ>; // Vector size wrappers. def SchedWriteFAddSizes @@ -598,4 +640,3 @@ def GenericPostRAModel : GenericX86Model { let PostRAScheduler = 1; } - Index: lib/Target/X86/X86ScheduleAtom.td =================================================================== --- lib/Target/X86/X86ScheduleAtom.td +++ lib/Target/X86/X86ScheduleAtom.td @@ -72,6 +72,51 @@ // A folded store needs a cycle on Port0 for the store data. def : WriteRes; +//////////////////////////////////////////////////////////////////////////////// +// Unsupported. +//////////////////////////////////////////////////////////////////////////////// + +let Unsupported = 1 in { +def : WriteRes; +def : WriteRes; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +} // Unsupported + //////////////////////////////////////////////////////////////////////////////// // Arithmetic. //////////////////////////////////////////////////////////////////////////////// @@ -235,19 +280,15 @@ defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; -defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; -defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; -defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; -defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -151,6 +151,51 @@ // A folded store needs a cycle on the SAGU for the store data. def : WriteRes; +//////////////////////////////////////////////////////////////////////////////// +// Unsupported. +//////////////////////////////////////////////////////////////////////////////// + +let Unsupported = 1 in { +def : WriteRes; +def : WriteRes; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +} // Unsupported + //////////////////////////////////////////////////////////////////////////////// // Arithmetic. //////////////////////////////////////////////////////////////////////////////// @@ -329,19 +374,15 @@ defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; -defm : JWriteResYMMPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; -defm : JWriteResYMMPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; -defm : JWriteResYMMPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; -defm : JWriteResYMMPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; Index: lib/Target/X86/X86ScheduleSLM.td =================================================================== --- lib/Target/X86/X86ScheduleSLM.td +++ lib/Target/X86/X86ScheduleSLM.td @@ -75,6 +75,47 @@ } } +let Unsupported = 1 in { +def : WriteRes; +def : WriteRes; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +} // Unsupported + // A folded store needs a cycle on MEC_RSV for the store data, but it does not // need an extra port cycle to recompute the address. def : WriteRes; @@ -179,11 +220,9 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; -defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; -defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; @@ -193,11 +232,9 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; -defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; -defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; Index: lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- lib/Target/X86/X86ScheduleZnver1.td +++ lib/Target/X86/X86ScheduleZnver1.td @@ -139,6 +139,47 @@ } } +let Unsupported = 1 in { +def : WriteRes; +def : WriteRes; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +} // Unsupported + // WriteRMW is set for instructions with Memory write // operation in codegen def : WriteRes; @@ -243,11 +284,9 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; //defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; //defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; // FIXME: Should folds require 1 extra uops? defm : ZnWriteResFpuPair; // FIXME: Should folds require 1 extra uops? @@ -277,11 +316,9 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; // Vector integer operations which uses FPU units Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -7,7 +7,7 @@ define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { ; GENERIC-LABEL: addpd512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: addpd512: @@ -22,7 +22,7 @@ define <8 x double> @addpd512fold(<8 x double> %y) { ; GENERIC-LABEL: addpd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00] +; GENERIC-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: addpd512fold: @@ -37,7 +37,7 @@ define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { ; GENERIC-LABEL: addps512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: addps512: @@ -52,7 +52,7 @@ define <16 x float> @addps512fold(<16 x float> %y) { ; GENERIC-LABEL: addps512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00] +; GENERIC-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: addps512fold: @@ -67,7 +67,7 @@ define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { ; GENERIC-LABEL: subpd512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: subpd512: @@ -82,7 +82,7 @@ define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { ; GENERIC-LABEL: subpd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] +; GENERIC-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: subpd512fold: @@ -98,7 +98,7 @@ define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { ; GENERIC-LABEL: subps512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: subps512: @@ -113,7 +113,7 @@ define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { ; GENERIC-LABEL: subps512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] +; GENERIC-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: subps512fold: @@ -129,7 +129,7 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { ; GENERIC-LABEL: imulq512: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: imulq512: @@ -171,7 +171,7 @@ define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { ; GENERIC-LABEL: mulpd512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mulpd512: @@ -186,7 +186,7 @@ define <8 x double> @mulpd512fold(<8 x double> %y) { ; GENERIC-LABEL: mulpd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00] +; GENERIC-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mulpd512fold: @@ -201,7 +201,7 @@ define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { ; GENERIC-LABEL: mulps512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mulps512: @@ -216,7 +216,7 @@ define <16 x float> @mulps512fold(<16 x float> %y) { ; GENERIC-LABEL: mulps512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00] +; GENERIC-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mulps512fold: @@ -231,7 +231,7 @@ define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { ; GENERIC-LABEL: divpd512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [45:44.00] +; GENERIC-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: divpd512: @@ -246,7 +246,7 @@ define <8 x double> @divpd512fold(<8 x double> %y) { ; GENERIC-LABEL: divpd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [52:44.00] +; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: divpd512fold: @@ -261,7 +261,7 @@ define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { ; GENERIC-LABEL: divps512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [29:28.00] +; GENERIC-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: divps512: @@ -276,7 +276,7 @@ define <16 x float> @divps512fold(<16 x float> %y) { ; GENERIC-LABEL: divps512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [36:28.00] +; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: divps512fold: @@ -291,7 +291,7 @@ define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { ; GENERIC-LABEL: vpaddq_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] +; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddq_test: @@ -305,7 +305,7 @@ define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { ; GENERIC-LABEL: vpaddq_fold_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50] +; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddq_fold_test: @@ -320,7 +320,7 @@ define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { ; GENERIC-LABEL: vpaddq_broadcast_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddq_broadcast_test: @@ -334,7 +334,7 @@ define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { ; GENERIC-LABEL: vpaddq_broadcast2_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] +; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddq_broadcast2_test: @@ -357,7 +357,7 @@ define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { ; GENERIC-LABEL: vpaddd_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.50] +; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_test: @@ -371,7 +371,7 @@ define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { ; GENERIC-LABEL: vpaddd_fold_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50] +; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_fold_test: @@ -386,7 +386,7 @@ define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { ; GENERIC-LABEL: vpaddd_broadcast_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_broadcast_test: @@ -400,8 +400,8 @@ define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_mask_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.50] +; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_mask_test: @@ -418,8 +418,8 @@ define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_maskz_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] +; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_maskz_test: @@ -436,8 +436,8 @@ define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_mask_fold_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_mask_fold_test: @@ -455,8 +455,8 @@ define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_mask_broadcast_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_mask_broadcast_test: @@ -473,8 +473,8 @@ define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_maskz_fold_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_maskz_fold_test: @@ -492,8 +492,8 @@ define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_maskz_broadcast_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_maskz_broadcast_test: @@ -510,7 +510,7 @@ define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { ; GENERIC-LABEL: vpsubq_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] +; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpsubq_test: @@ -524,7 +524,7 @@ define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { ; GENERIC-LABEL: vpsubd_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.50] +; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpsubd_test: @@ -538,7 +538,7 @@ define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { ; GENERIC-LABEL: vpmulld_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpmulld_test: @@ -600,7 +600,7 @@ define <16 x float> @sqrtD(<16 x float> %a) nounwind { ; GENERIC-LABEL: sqrtD: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vsqrtps %zmm0, %zmm0 # sched: [29:28.00] +; GENERIC-NEXT: vsqrtps %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sqrtD: @@ -615,7 +615,7 @@ define <8 x double> @sqrtE(<8 x double> %a) nounwind { ; GENERIC-LABEL: sqrtE: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [45:44.00] +; GENERIC-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sqrtE: @@ -629,7 +629,7 @@ define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { ; GENERIC-LABEL: fadd_broadcast: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00] +; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: fadd_broadcast: @@ -643,7 +643,7 @@ define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { ; GENERIC-LABEL: addq_broadcast: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: addq_broadcast: @@ -657,7 +657,7 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { ; GENERIC-LABEL: orq_broadcast: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: orq_broadcast: @@ -671,7 +671,7 @@ define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { ; GENERIC-LABEL: andd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: andd512fold: @@ -687,7 +687,7 @@ define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { ; GENERIC-LABEL: andqbrst: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: andqbrst: @@ -705,8 +705,8 @@ define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, ; GENERIC-LABEL: test_mask_vaddps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_vaddps: @@ -725,8 +725,8 @@ define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vmulps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_vmulps: @@ -743,8 +743,8 @@ define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vminps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_vminps: @@ -763,7 +763,7 @@ ; GENERIC-LABEL: test_mask_vminpd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_vminpd: @@ -781,8 +781,8 @@ define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vmaxps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_vmaxps: @@ -801,7 +801,7 @@ ; GENERIC-LABEL: test_mask_vmaxpd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_vmaxpd: @@ -819,8 +819,8 @@ define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vsubps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_vsubps: @@ -837,8 +837,8 @@ define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vdivps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [29:28.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_vdivps: @@ -855,8 +855,8 @@ define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_vaddpd: @@ -873,8 +873,8 @@ define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone { ; GENERIC-LABEL: test_maskz_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00] +; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_maskz_vaddpd: @@ -891,8 +891,8 @@ define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind { ; GENERIC-LABEL: test_mask_fold_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_fold_vaddpd: @@ -910,8 +910,8 @@ define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind { ; GENERIC-LABEL: test_maskz_fold_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_maskz_fold_vaddpd: @@ -929,7 +929,7 @@ define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { ; GENERIC-LABEL: test_broadcast_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [10:1.00] +; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_broadcast_vaddpd: @@ -947,8 +947,8 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind { ; GENERIC-LABEL: test_mask_broadcast_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [10:1.00] +; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -971,8 +971,8 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, ; GENERIC-LABEL: test_maskz_broadcast_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_maskz_broadcast_vaddpd: @@ -994,7 +994,7 @@ define <16 x float> @test_fxor(<16 x float> %a) { ; GENERIC-LABEL: test_fxor: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_fxor: @@ -1023,7 +1023,7 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) ; GENERIC-LABEL: fabs_v8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: fabs_v8f64: @@ -1039,7 +1039,7 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) ; GENERIC-LABEL: fabs_v16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: fabs_v16f32: @@ -1333,7 +1333,7 @@ define <16 x float> @sitof32(<16 x i32> %a) nounwind { ; GENERIC-LABEL: sitof32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sitof32: @@ -1347,7 +1347,7 @@ define <8 x double> @sltof864(<8 x i64> %a) { ; GENERIC-LABEL: sltof864: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sltof864: @@ -1478,7 +1478,7 @@ define <8 x double> @ulto8f64(<8 x i64> %a) { ; GENERIC-LABEL: ulto8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ulto8f64: @@ -1492,8 +1492,8 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; GENERIC-LABEL: ulto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:1.00] +; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ulto16f64: @@ -1508,7 +1508,7 @@ define <16 x i32> @f64to16si(<16 x float> %a) nounwind { ; GENERIC-LABEL: f64to16si: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f64to16si: @@ -1522,7 +1522,7 @@ define <16 x i32> @f32to16ui(<16 x float> %a) nounwind { ; GENERIC-LABEL: f32to16ui: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f32to16ui: @@ -1536,7 +1536,7 @@ define <16 x i8> @f32to16uc(<16 x float> %f) { ; GENERIC-LABEL: f32to16uc: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: vpmovdb %zmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1554,7 +1554,7 @@ define <16 x i16> @f32to16us(<16 x float> %f) { ; GENERIC-LABEL: f32to16us: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: vpmovdw %zmm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1598,7 +1598,7 @@ define <8 x i32> @f64to8ui(<8 x double> %a) nounwind { ; GENERIC-LABEL: f64to8ui: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f64to8ui: @@ -1612,7 +1612,7 @@ define <8 x i16> @f64to8us(<8 x double> %f) { ; GENERIC-LABEL: f64to8us: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [0:?] ; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1630,7 +1630,7 @@ define <8 x i8> @f64to8uc(<8 x double> %f) { ; GENERIC-LABEL: f64to8uc: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [0:?] ; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1664,7 +1664,7 @@ define <8 x double> @sito8f64(<8 x i32> %a) { ; GENERIC-LABEL: sito8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sito8f64: @@ -1678,7 +1678,7 @@ ; GENERIC-LABEL: i32to8f64_mask: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: i32to8f64_mask: @@ -1700,7 +1700,7 @@ ; GENERIC-LABEL: sito8f64_maskz: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sito8f64_maskz: @@ -1722,7 +1722,7 @@ define <8 x i32> @f64to8si(<8 x double> %a) { ; GENERIC-LABEL: f64to8si: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f64to8si: @@ -1752,8 +1752,8 @@ define <16 x float> @f64to16f32(<16 x double> %b) nounwind { ; GENERIC-LABEL: f64to16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [4:1.00] +; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [0:?] +; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [0:?] ; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1823,7 +1823,7 @@ define <8 x double> @f32to8f64(<8 x float> %b) nounwind { ; GENERIC-LABEL: f32to8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00] +; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f32to8f64: @@ -2037,9 +2037,9 @@ define <16 x double> @uito16f64(<16 x i32> %a) nounwind { ; GENERIC-LABEL: uito16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [4:1.00] +; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [0:?] ; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [4:1.00] +; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2057,7 +2057,7 @@ define <8 x float> @slto8f32(<8 x i64> %a) { ; GENERIC-LABEL: slto8f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: slto8f32: @@ -2071,8 +2071,8 @@ define <16 x float> @slto16f32(<16 x i64> %a) { ; GENERIC-LABEL: slto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [3:1.00] +; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [0:?] +; GENERIC-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [0:?] ; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2089,7 +2089,7 @@ define <8 x double> @slto8f64(<8 x i64> %a) { ; GENERIC-LABEL: slto8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: slto8f64: @@ -2103,8 +2103,8 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; GENERIC-LABEL: slto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:1.00] +; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: slto16f64: @@ -2119,7 +2119,7 @@ define <8 x float> @ulto8f32(<8 x i64> %a) { ; GENERIC-LABEL: ulto8f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ulto8f32: @@ -2133,8 +2133,8 @@ define <16 x float> @ulto16f32(<16 x i64> %a) { ; GENERIC-LABEL: ulto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [3:1.00] +; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [0:?] +; GENERIC-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [0:?] ; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2152,7 +2152,7 @@ ; GENERIC-LABEL: uito8f64_mask: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00] +; GENERIC-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: uito8f64_mask: @@ -2174,7 +2174,7 @@ ; GENERIC-LABEL: uito8f64_maskz: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00] +; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: uito8f64_maskz: @@ -2205,7 +2205,7 @@ define <16 x float> @uito16f32(<16 x i32> %a) nounwind { ; GENERIC-LABEL: uito16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: uito16f32: @@ -2219,7 +2219,7 @@ define <8 x double> @uito8f64(<8 x i32> %a) { ; GENERIC-LABEL: uito8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: uito8f64: @@ -2319,7 +2319,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sbto16f32: @@ -2337,7 +2337,7 @@ ; GENERIC-LABEL: scto16f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: scto16f32: @@ -2353,7 +2353,7 @@ ; GENERIC-LABEL: ssto16f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ssto16f32: @@ -2369,7 +2369,7 @@ ; GENERIC-LABEL: ssto16f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ssto16f64: @@ -2387,7 +2387,7 @@ ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] ; GENERIC-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: scto8f64: @@ -2405,9 +2405,9 @@ ; GENERIC-LABEL: scto16f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: scto16f64: @@ -2425,13 +2425,13 @@ ; GENERIC-LABEL: sbto16f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00] -; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [0:?] +; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: kunpckbw %k0, %k1, %k0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovm2d %k0, %zmm1 # sched: [1:0.33] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sbto16f64: @@ -2454,9 +2454,9 @@ ; GENERIC-LABEL: sbto8f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00] +; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [0:?] ; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sbto8f64: @@ -2574,7 +2574,7 @@ ; GENERIC-LABEL: ucto16f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ucto16f32: @@ -2591,7 +2591,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ucto8f64: @@ -2608,7 +2608,7 @@ ; GENERIC-LABEL: swto16f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: swto16f32: @@ -2624,7 +2624,7 @@ ; GENERIC-LABEL: swto8f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: swto8f64: @@ -2640,9 +2640,9 @@ ; GENERIC-LABEL: swto16f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: swto16f64: @@ -2660,9 +2660,9 @@ ; GENERIC-LABEL: ucto16f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ucto16f64: @@ -2680,7 +2680,7 @@ ; GENERIC-LABEL: uwto16f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: uwto16f32: @@ -2696,7 +2696,7 @@ ; GENERIC-LABEL: uwto8f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: uwto8f64: @@ -2712,9 +2712,9 @@ ; GENERIC-LABEL: uwto16f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: uwto16f64: @@ -2731,7 +2731,7 @@ define <16 x float> @sito16f32(<16 x i32> %a) { ; GENERIC-LABEL: sito16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sito16f32: @@ -2745,9 +2745,9 @@ define <16 x double> @sito16f64(<16 x i32> %a) { ; GENERIC-LABEL: sito16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [0:?] ; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2766,7 +2766,7 @@ ; GENERIC-LABEL: usto16f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: usto16f32: @@ -2783,8 +2783,8 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto16f32: @@ -2804,10 +2804,10 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [0:?] +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto16f64: @@ -2849,7 +2849,7 @@ ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto8f64: @@ -4216,7 +4216,7 @@ define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { ; GENERIC-LABEL: fptrunc_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: fptrunc_test: @@ -4230,7 +4230,7 @@ define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { ; GENERIC-LABEL: fpext_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00] +; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: fpext_test: @@ -4246,7 +4246,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16i1_to_16xi32: @@ -4265,7 +4265,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] ; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8i1_to_8xi64: @@ -4303,7 +4303,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { ; GENERIC-LABEL: trunc_16i32_to_16i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax @@ -4439,7 +4439,7 @@ define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { ; GENERIC-LABEL: sext_16i1_16i32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [1:0.50] +; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [0:?] ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4493,7 +4493,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: test21: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [0:?] ; GENERIC-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] ; GENERIC-NEXT: kshiftrq $32, %k1, %k1 # sched: [1:1.00] @@ -4640,7 +4640,7 @@ define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 { ; GENERIC-LABEL: zext_64xi1_to_64xi8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [1:0.50] +; GENERIC-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4657,9 +4657,9 @@ define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { ; GENERIC-LABEL: zext_32xi1_to_32xi16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [1:0.50] +; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [0:?] ; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_32xi1_to_32xi16: @@ -4694,7 +4694,7 @@ define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { ; GENERIC-LABEL: zext_32xi1_to_32xi8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [1:0.50] +; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4757,8 +4757,8 @@ define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; GENERIC-LABEL: test_x86_fmadd_ps_z: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_x86_fmadd_ps_z: @@ -4774,8 +4774,8 @@ define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; GENERIC-LABEL: test_x86_fmsub_ps_z: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_x86_fmsub_ps_z: @@ -4791,8 +4791,8 @@ define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; GENERIC-LABEL: test_x86_fnmadd_ps_z: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_x86_fnmadd_ps_z: @@ -4808,9 +4808,9 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; GENERIC-LABEL: test_x86_fnmsub_ps_z: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_x86_fnmsub_ps_z: @@ -4831,8 +4831,8 @@ define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; GENERIC-LABEL: test_x86_fmadd_pd_z: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_x86_fmadd_pd_z: @@ -4848,8 +4848,8 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; GENERIC-LABEL: test_x86_fmsub_pd_z: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_x86_fmsub_pd_z: @@ -4918,8 +4918,8 @@ define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; GENERIC-LABEL: test231_br: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [12:1.00] -; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test231_br: @@ -4935,8 +4935,8 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; GENERIC-LABEL: test213_br: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00] +; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test213_br: @@ -4955,8 +4955,8 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [12:1.00] -; GENERIC-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [0:?] +; GENERIC-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_x86_fmadd132_ps: @@ -4979,8 +4979,8 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [12:1.00] -; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5005,8 +5005,8 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [10:1.00] +; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5028,8 +5028,8 @@ define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpandd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpandd: @@ -5048,8 +5048,8 @@ define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpandnd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpandnd: @@ -5070,8 +5070,8 @@ define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpord: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpord: @@ -5090,8 +5090,8 @@ define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpxord: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpxord: @@ -5110,8 +5110,8 @@ define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpandq: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpandq: @@ -5129,8 +5129,8 @@ define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpandnq: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpandnq: @@ -5149,8 +5149,8 @@ define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vporq: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vporq: @@ -5168,8 +5168,8 @@ define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpxorq: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpxorq: @@ -5187,7 +5187,7 @@ define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) { ; GENERIC-LABEL: and_v64i8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: and_v64i8: @@ -5201,7 +5201,7 @@ define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) { ; GENERIC-LABEL: andn_v64i8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: andn_v64i8: @@ -5219,7 +5219,7 @@ define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) { ; GENERIC-LABEL: or_v64i8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: or_v64i8: @@ -5233,7 +5233,7 @@ define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) { ; GENERIC-LABEL: xor_v64i8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: xor_v64i8: @@ -5247,7 +5247,7 @@ define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) { ; GENERIC-LABEL: and_v32i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: and_v32i16: @@ -5261,7 +5261,7 @@ define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) { ; GENERIC-LABEL: andn_v32i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: andn_v32i16: @@ -5277,7 +5277,7 @@ define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) { ; GENERIC-LABEL: or_v32i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: or_v32i16: @@ -5291,7 +5291,7 @@ define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) { ; GENERIC-LABEL: xor_v32i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: xor_v32i16: @@ -5306,8 +5306,8 @@ ; GENERIC-LABEL: masked_and_v16f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [0:?] +; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: masked_and_v16f32: @@ -5331,8 +5331,8 @@ ; GENERIC-LABEL: masked_or_v16f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [0:?] +; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: masked_or_v16f32: @@ -5356,8 +5356,8 @@ ; GENERIC-LABEL: masked_xor_v16f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [0:?] +; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: masked_xor_v16f32: @@ -5381,8 +5381,8 @@ ; GENERIC-LABEL: masked_and_v8f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [0:?] +; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: masked_and_v8f64: @@ -5406,8 +5406,8 @@ ; GENERIC-LABEL: masked_or_v8f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [0:?] +; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: masked_or_v8f64: @@ -5431,8 +5431,8 @@ ; GENERIC-LABEL: masked_xor_v8f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [0:?] +; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: masked_xor_v8f64: @@ -5456,7 +5456,7 @@ ; GENERIC-LABEL: test_mm512_mask_and_epi32: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] +; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_and_epi32: @@ -5478,7 +5478,7 @@ ; GENERIC-LABEL: test_mm512_mask_or_epi32: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] +; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_or_epi32: @@ -5500,7 +5500,7 @@ ; GENERIC-LABEL: test_mm512_mask_xor_epi32: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] +; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_xor_epi32: @@ -5522,7 +5522,7 @@ ; GENERIC-LABEL: test_mm512_mask_xor_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] +; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_xor_pd: @@ -5544,7 +5544,7 @@ ; GENERIC-LABEL: test_mm512_maskz_xor_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] +; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_xor_pd: @@ -5566,7 +5566,7 @@ ; GENERIC-LABEL: test_mm512_mask_xor_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] +; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_xor_ps: @@ -5588,7 +5588,7 @@ ; GENERIC-LABEL: test_mm512_maskz_xor_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] +; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_xor_ps: @@ -5610,7 +5610,7 @@ ; GENERIC-LABEL: test_mm512_mask_or_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] +; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_or_pd: @@ -5632,7 +5632,7 @@ ; GENERIC-LABEL: test_mm512_maskz_or_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] +; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_or_pd: @@ -5654,7 +5654,7 @@ ; GENERIC-LABEL: test_mm512_mask_or_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] +; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_or_ps: @@ -5676,7 +5676,7 @@ ; GENERIC-LABEL: test_mm512_maskz_or_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] +; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_or_ps: @@ -5698,7 +5698,7 @@ ; GENERIC-LABEL: test_mm512_mask_and_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] +; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_and_pd: @@ -5720,7 +5720,7 @@ ; GENERIC-LABEL: test_mm512_maskz_and_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] +; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_and_pd: @@ -5742,7 +5742,7 @@ ; GENERIC-LABEL: test_mm512_mask_and_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_and_ps: @@ -5764,7 +5764,7 @@ ; GENERIC-LABEL: test_mm512_maskz_and_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] +; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_and_ps: @@ -5786,7 +5786,7 @@ ; GENERIC-LABEL: test_mm512_mask_andnot_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] +; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_andnot_pd: @@ -5809,7 +5809,7 @@ ; GENERIC-LABEL: test_mm512_maskz_andnot_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] +; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_andnot_pd: @@ -5832,7 +5832,7 @@ ; GENERIC-LABEL: test_mm512_mask_andnot_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] +; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_andnot_ps: @@ -5855,7 +5855,7 @@ ; GENERIC-LABEL: test_mm512_maskz_andnot_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] +; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_andnot_ps: @@ -6349,7 +6349,7 @@ define <16 x i32> @mov_test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { ; GENERIC-LABEL: mov_test32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6368,7 +6368,7 @@ define <16 x i32> @mov_test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { ; GENERIC-LABEL: mov_test33: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6387,7 +6387,7 @@ define <16 x i32> @mov_test34(i8 * %addr, <16 x i32> %mask1) { ; GENERIC-LABEL: mov_test34: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6406,7 +6406,7 @@ define <16 x i32> @mov_test35(i8 * %addr, <16 x i32> %mask1) { ; GENERIC-LABEL: mov_test35: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6425,7 +6425,7 @@ define <8 x i64> @mov_test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { ; GENERIC-LABEL: mov_test36: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6444,7 +6444,7 @@ define <8 x i64> @mov_test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { ; GENERIC-LABEL: mov_test37: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6463,7 +6463,7 @@ define <8 x i64> @mov_test38(i8 * %addr, <8 x i64> %mask1) { ; GENERIC-LABEL: mov_test38: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6482,7 +6482,7 @@ define <8 x i64> @mov_test39(i8 * %addr, <8 x i64> %mask1) { ; GENERIC-LABEL: mov_test39: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6502,7 +6502,7 @@ ; GENERIC-LABEL: mov_test40: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6523,7 +6523,7 @@ ; GENERIC-LABEL: mov_test41: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6544,7 +6544,7 @@ ; GENERIC-LABEL: mov_test42: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6565,7 +6565,7 @@ ; GENERIC-LABEL: mov_test43: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6586,7 +6586,7 @@ ; GENERIC-LABEL: mov_test44: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6607,7 +6607,7 @@ ; GENERIC-LABEL: mov_test45: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6628,7 +6628,7 @@ ; GENERIC-LABEL: mov_test46: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6649,7 +6649,7 @@ ; GENERIC-LABEL: mov_test47: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6867,7 +6867,7 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { ; GENERIC-LABEL: zext_test1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] +; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [0:?] ; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33] @@ -6891,7 +6891,7 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { ; GENERIC-LABEL: zext_test2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] +; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [0:?] ; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33] @@ -6917,7 +6917,7 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { ; GENERIC-LABEL: zext_test3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] +; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [0:?] ; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: andb $1, %al # sched: [1:0.33] @@ -7059,7 +7059,7 @@ ; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: .LBB386_1: ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [1:0.50] +; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [0:?] ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7589,7 +7589,7 @@ define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { ; GENERIC-LABEL: test_build_vec_v64i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:0.50] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_build_vec_v64i1: @@ -7604,9 +7604,9 @@ ; GENERIC-LABEL: ktest_1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovupd (%rdi), %zmm1 # sched: [7:0.50] -; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [0:?] ; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00] ; GENERIC-NEXT: # %bb.1: # %L1 @@ -7667,13 +7667,13 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovups (%rdi), %zmm2 # sched: [7:0.50] ; GENERIC-NEXT: vmovups 64(%rdi), %zmm3 # sched: [7:0.50] -; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00] +; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [0:?] ; GENERIC-NEXT: kunpckwd %k1, %k2, %k0 # sched: [1:1.00] ; GENERIC-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [7:0.50] ; GENERIC-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] +; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [0:?] ; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00] ; GENERIC-NEXT: kortestd %k1, %k0 # sched: [1:0.33] ; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00] @@ -7919,7 +7919,7 @@ define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { ; GENERIC-LABEL: store_32i1_1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] @@ -7942,7 +7942,7 @@ ; ; GENERIC-LABEL: store_64i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [0:?] ; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] @@ -7962,7 +7962,7 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) { ; GENERIC-LABEL: test_bitcast_v8i1_zext: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [0:?] ; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] @@ -7986,7 +7986,7 @@ define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) { ; GENERIC-LABEL: test_bitcast_v16i1_zext: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [0:?] ; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] @@ -8218,7 +8218,7 @@ define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) { ; GENERIC-LABEL: _ss16xfloat_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8239,7 +8239,7 @@ define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) { ; GENERIC-LABEL: _ss16xfloat_maskz: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8274,7 +8274,7 @@ define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) { ; GENERIC-LABEL: _ss16xfloat_mask_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8294,7 +8294,7 @@ define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) { ; GENERIC-LABEL: _ss16xfloat_maskz_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8454,7 +8454,7 @@ ; GENERIC-LABEL: test_vbroadcast: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00] +; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [0:?] ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: knotw %k0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] Index: test/CodeGen/X86/avx512-shuffle-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-shuffle-schedule.ll +++ test/CodeGen/X86/avx512-shuffle-schedule.ll @@ -417,7 +417,7 @@ ; GENERIC-LABEL: test_masked_32xi16_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -439,7 +439,7 @@ ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -458,7 +458,7 @@ ; GENERIC-LABEL: test_masked_32xi16_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -480,7 +480,7 @@ ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -499,7 +499,7 @@ ; GENERIC-LABEL: test_masked_32xi16_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -521,7 +521,7 @@ ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -555,7 +555,7 @@ ; GENERIC-LABEL: test_masked_32xi16_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -577,7 +577,7 @@ ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -612,7 +612,7 @@ ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -633,7 +633,7 @@ ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -654,7 +654,7 @@ ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -675,7 +675,7 @@ ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -696,7 +696,7 @@ ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -717,7 +717,7 @@ ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -754,7 +754,7 @@ ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -775,7 +775,7 @@ ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1205,7 +1205,7 @@ ; GENERIC-LABEL: test_masked_16xi32_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1227,7 +1227,7 @@ ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1246,7 +1246,7 @@ ; GENERIC-LABEL: test_masked_16xi32_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1268,7 +1268,7 @@ ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1287,7 +1287,7 @@ ; GENERIC-LABEL: test_masked_16xi32_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1309,7 +1309,7 @@ ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1343,7 +1343,7 @@ ; GENERIC-LABEL: test_masked_16xi32_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1365,7 +1365,7 @@ ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1400,7 +1400,7 @@ ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1421,7 +1421,7 @@ ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1442,7 +1442,7 @@ ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1463,7 +1463,7 @@ ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1484,7 +1484,7 @@ ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1505,7 +1505,7 @@ ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1542,7 +1542,7 @@ ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1563,7 +1563,7 @@ ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1953,7 +1953,7 @@ ; GENERIC-LABEL: test_masked_8xi64_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1975,7 +1975,7 @@ ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1993,7 +1993,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2013,7 +2013,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2031,7 +2031,7 @@ ; GENERIC-LABEL: test_masked_8xi64_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2053,7 +2053,7 @@ ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2084,7 +2084,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2104,7 +2104,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2122,7 +2122,7 @@ ; GENERIC-LABEL: test_masked_8xi64_perm_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2144,7 +2144,7 @@ ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2162,7 +2162,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2182,7 +2182,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2215,7 +2215,7 @@ ; GENERIC-LABEL: test_masked_8xi64_perm_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2237,7 +2237,7 @@ ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2255,7 +2255,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2275,7 +2275,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2309,7 +2309,7 @@ ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2330,7 +2330,7 @@ ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2350,7 +2350,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2369,7 +2369,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2389,7 +2389,7 @@ ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2410,7 +2410,7 @@ ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2444,7 +2444,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2463,7 +2463,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2483,7 +2483,7 @@ ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2504,7 +2504,7 @@ ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2524,7 +2524,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2543,7 +2543,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2579,7 +2579,7 @@ ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2600,7 +2600,7 @@ ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2620,7 +2620,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2639,7 +2639,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2737,7 +2737,7 @@ ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3068,7 +3068,7 @@ ; GENERIC-LABEL: test_masked_16xfloat_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3090,7 +3090,7 @@ ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3109,7 +3109,7 @@ ; GENERIC-LABEL: test_masked_16xfloat_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3131,7 +3131,7 @@ ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3150,7 +3150,7 @@ ; GENERIC-LABEL: test_masked_16xfloat_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3172,7 +3172,7 @@ ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3206,7 +3206,7 @@ ; GENERIC-LABEL: test_masked_16xfloat_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3228,7 +3228,7 @@ ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3263,7 +3263,7 @@ ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3284,7 +3284,7 @@ ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3305,7 +3305,7 @@ ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3326,7 +3326,7 @@ ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3347,7 +3347,7 @@ ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3368,7 +3368,7 @@ ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3405,7 +3405,7 @@ ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3426,7 +3426,7 @@ ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3816,7 +3816,7 @@ ; GENERIC-LABEL: test_masked_8xdouble_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3838,7 +3838,7 @@ ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3856,7 +3856,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3876,7 +3876,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3894,7 +3894,7 @@ ; GENERIC-LABEL: test_masked_8xdouble_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3916,7 +3916,7 @@ ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3947,7 +3947,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3967,7 +3967,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3985,7 +3985,7 @@ ; GENERIC-LABEL: test_masked_8xdouble_perm_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4007,7 +4007,7 @@ ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4025,7 +4025,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4045,7 +4045,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4078,7 +4078,7 @@ ; GENERIC-LABEL: test_masked_8xdouble_perm_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4100,7 +4100,7 @@ ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4118,7 +4118,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4138,7 +4138,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4172,7 +4172,7 @@ ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4193,7 +4193,7 @@ ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4213,7 +4213,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4232,7 +4232,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4252,7 +4252,7 @@ ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4273,7 +4273,7 @@ ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4307,7 +4307,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4326,7 +4326,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4346,7 +4346,7 @@ ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4367,7 +4367,7 @@ ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4387,7 +4387,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4406,7 +4406,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4442,7 +4442,7 @@ ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4463,7 +4463,7 @@ ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4483,7 +4483,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4502,7 +4502,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5269,7 +5269,7 @@ define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) { ; GENERIC-LABEL: test_64xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_64xi8_perm_mask0: @@ -5282,8 +5282,8 @@ define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5302,8 +5302,8 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask0: @@ -5319,8 +5319,8 @@ define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5339,8 +5339,8 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask1: @@ -5356,8 +5356,8 @@ define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5376,8 +5376,8 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask2: @@ -5393,7 +5393,7 @@ define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) { ; GENERIC-LABEL: test_64xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_64xi8_perm_mask3: @@ -5406,8 +5406,8 @@ define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5426,8 +5426,8 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask3: @@ -5444,7 +5444,7 @@ ; GENERIC-LABEL: test_64xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_64xi8_perm_mem_mask0: @@ -5460,8 +5460,8 @@ ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask0: @@ -5481,8 +5481,8 @@ ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask0: @@ -5502,8 +5502,8 @@ ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask1: @@ -5523,8 +5523,8 @@ ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask1: @@ -5544,8 +5544,8 @@ ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask2: @@ -5565,8 +5565,8 @@ ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask2: @@ -5586,7 +5586,7 @@ ; GENERIC-LABEL: test_64xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_64xi8_perm_mem_mask3: @@ -5602,8 +5602,8 @@ ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask3: @@ -5623,8 +5623,8 @@ ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50] +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask3: @@ -7005,7 +7005,7 @@ define <32 x i16> @test_32xi16_perm_high_mask0(<32 x i16> %vec) { ; GENERIC-LABEL: test_32xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi16_perm_high_mask0: @@ -7018,8 +7018,8 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7038,8 +7038,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mask0: @@ -7055,8 +7055,8 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7075,8 +7075,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mask1: @@ -7092,8 +7092,8 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7112,8 +7112,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mask2: @@ -7129,7 +7129,7 @@ define <32 x i16> @test_32xi16_perm_low_mask3(<32 x i16> %vec) { ; GENERIC-LABEL: test_32xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi16_perm_low_mask3: @@ -7142,8 +7142,8 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7162,8 +7162,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mask3: @@ -7179,8 +7179,8 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7199,8 +7199,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mask4: @@ -7216,8 +7216,8 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7236,8 +7236,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mask5: @@ -7253,7 +7253,7 @@ define <32 x i16> @test_32xi16_perm_high_mask6(<32 x i16> %vec) { ; GENERIC-LABEL: test_32xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi16_perm_high_mask6: @@ -7266,8 +7266,8 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7286,8 +7286,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mask6: @@ -7303,8 +7303,8 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7323,8 +7323,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mask7: @@ -7340,7 +7340,7 @@ define <32 x i16> @test_32xi16_perm_high_mem_mask0(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi16_perm_high_mem_mask0: @@ -7354,8 +7354,8 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask0: @@ -7373,8 +7373,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: @@ -7392,8 +7392,8 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask1: @@ -7411,8 +7411,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: @@ -7430,8 +7430,8 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask2: @@ -7449,8 +7449,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: @@ -7468,7 +7468,7 @@ define <32 x i16> @test_32xi16_perm_low_mem_mask3(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi16_perm_low_mem_mask3: @@ -7482,8 +7482,8 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask3: @@ -7501,8 +7501,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: @@ -7520,8 +7520,8 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask4: @@ -7539,8 +7539,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: @@ -7558,8 +7558,8 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [0:?] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7579,8 +7579,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [0:?] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [0:?] ; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7600,7 +7600,7 @@ define <32 x i16> @test_32xi16_perm_high_mem_mask6(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi16_perm_high_mem_mask6: @@ -7614,8 +7614,8 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask6: @@ -7633,8 +7633,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: @@ -7652,8 +7652,8 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask7: @@ -7671,8 +7671,8 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: @@ -8398,7 +8398,7 @@ define <16 x i32> @test2_16xi32_perm_mask0(<16 x i32> %vec) { ; GENERIC-LABEL: test2_16xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_16xi32_perm_mask0: @@ -8411,8 +8411,8 @@ define <16 x i32> @test2_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8431,8 +8431,8 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mask0: @@ -8448,8 +8448,8 @@ define <16 x i32> @test2_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8468,8 +8468,8 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mask1: @@ -8485,8 +8485,8 @@ define <16 x i32> @test2_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8505,8 +8505,8 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mask2: @@ -8522,7 +8522,7 @@ define <16 x i32> @test2_16xi32_perm_mask3(<16 x i32> %vec) { ; GENERIC-LABEL: test2_16xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_16xi32_perm_mask3: @@ -8535,8 +8535,8 @@ define <16 x i32> @test2_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [0:?] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8555,8 +8555,8 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mask3: @@ -8572,7 +8572,7 @@ define <16 x i32> @test2_16xi32_perm_mem_mask0(<16 x i32>* %vp) { ; GENERIC-LABEL: test2_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_16xi32_perm_mem_mask0: @@ -8586,8 +8586,8 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask0: @@ -8605,8 +8605,8 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask0: @@ -8624,8 +8624,8 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask1: @@ -8643,8 +8643,8 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask1: @@ -8662,8 +8662,8 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask2: @@ -8681,8 +8681,8 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask2: @@ -8700,7 +8700,7 @@ define <16 x i32> @test2_16xi32_perm_mem_mask3(<16 x i32>* %vp) { ; GENERIC-LABEL: test2_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_16xi32_perm_mem_mask3: @@ -8714,8 +8714,8 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask3: @@ -8733,8 +8733,8 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [0:?] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask3: @@ -9127,7 +9127,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9147,7 +9147,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9164,7 +9164,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9184,7 +9184,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9201,7 +9201,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9221,7 +9221,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9251,7 +9251,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9271,7 +9271,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9302,7 +9302,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9323,7 +9323,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9342,7 +9342,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9363,7 +9363,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9382,7 +9382,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9403,7 +9403,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9436,7 +9436,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9457,7 +9457,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9851,7 +9851,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9871,7 +9871,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9888,7 +9888,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9908,7 +9908,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9925,7 +9925,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9945,7 +9945,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9975,7 +9975,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9995,7 +9995,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10026,7 +10026,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10047,7 +10047,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10066,7 +10066,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10087,7 +10087,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10106,7 +10106,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10127,7 +10127,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10160,7 +10160,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10181,7 +10181,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10575,7 +10575,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10595,7 +10595,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10612,7 +10612,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10632,7 +10632,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10649,7 +10649,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10669,7 +10669,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10699,7 +10699,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10719,7 +10719,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10750,7 +10750,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10771,7 +10771,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10790,7 +10790,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10811,7 +10811,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10830,7 +10830,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10851,7 +10851,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10884,7 +10884,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10905,7 +10905,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11299,7 +11299,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11319,7 +11319,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11336,7 +11336,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11356,7 +11356,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11373,7 +11373,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11393,7 +11393,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11423,7 +11423,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11443,7 +11443,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11474,7 +11474,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11495,7 +11495,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11514,7 +11514,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11535,7 +11535,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11554,7 +11554,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11575,7 +11575,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11608,7 +11608,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11629,7 +11629,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12372,7 +12372,7 @@ define <16 x float> @test_16xfloat_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2) { ; GENERIC-LABEL: test_16xfloat_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_unpack_low_mask0: @@ -12385,8 +12385,8 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12405,8 +12405,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask0: @@ -12422,8 +12422,8 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12442,8 +12442,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask1: @@ -12459,8 +12459,8 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12479,8 +12479,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask2: @@ -12496,7 +12496,7 @@ define <16 x float> @test_16xfloat_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2) { ; GENERIC-LABEL: test_16xfloat_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_unpack_low_mask3: @@ -12509,8 +12509,8 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12529,8 +12529,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask3: @@ -12546,7 +12546,7 @@ define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_unpack_low_mem_mask0: @@ -12560,8 +12560,8 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12581,8 +12581,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: @@ -12600,8 +12600,8 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12621,8 +12621,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: @@ -12640,8 +12640,8 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12661,8 +12661,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: @@ -12680,7 +12680,7 @@ define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_unpack_low_mem_mask3: @@ -12694,8 +12694,8 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12715,8 +12715,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: @@ -13277,7 +13277,7 @@ define <8 x double> @test_8xdouble_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2) { ; GENERIC-LABEL: test_8xdouble_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_unpack_low_mask0: @@ -13290,8 +13290,8 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13310,8 +13310,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: @@ -13327,8 +13327,8 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13347,8 +13347,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: @@ -13364,8 +13364,8 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13384,8 +13384,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: @@ -13401,7 +13401,7 @@ define <8 x double> @test_8xdouble_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2) { ; GENERIC-LABEL: test_8xdouble_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_unpack_low_mask3: @@ -13414,8 +13414,8 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13434,8 +13434,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: @@ -13451,7 +13451,7 @@ define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_unpack_low_mem_mask0: @@ -13465,8 +13465,8 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13486,8 +13486,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: @@ -13505,8 +13505,8 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13526,8 +13526,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: @@ -13545,8 +13545,8 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13566,8 +13566,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: @@ -13585,7 +13585,7 @@ define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_unpack_low_mem_mask3: @@ -13599,8 +13599,8 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13620,8 +13620,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: @@ -14363,7 +14363,7 @@ define <16 x float> @test_16xfloat_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2) { ; GENERIC-LABEL: test_16xfloat_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_unpack_high_mask0: @@ -14376,8 +14376,8 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14396,8 +14396,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask0: @@ -14413,8 +14413,8 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14433,8 +14433,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask1: @@ -14450,8 +14450,8 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14470,8 +14470,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask2: @@ -14487,7 +14487,7 @@ define <16 x float> @test_16xfloat_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2) { ; GENERIC-LABEL: test_16xfloat_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_unpack_high_mask3: @@ -14500,8 +14500,8 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14520,8 +14520,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask3: @@ -14537,7 +14537,7 @@ define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_unpack_high_mem_mask0: @@ -14551,8 +14551,8 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14572,8 +14572,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: @@ -14591,8 +14591,8 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14612,8 +14612,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: @@ -14631,8 +14631,8 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14652,8 +14652,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: @@ -14671,7 +14671,7 @@ define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_unpack_high_mem_mask3: @@ -14685,8 +14685,8 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [0:?] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14706,8 +14706,8 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: @@ -15268,7 +15268,7 @@ define <8 x double> @test_8xdouble_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2) { ; GENERIC-LABEL: test_8xdouble_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_unpack_high_mask0: @@ -15281,8 +15281,8 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15301,8 +15301,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: @@ -15318,8 +15318,8 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15338,8 +15338,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: @@ -15355,8 +15355,8 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15375,8 +15375,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: @@ -15392,7 +15392,7 @@ define <8 x double> @test_8xdouble_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2) { ; GENERIC-LABEL: test_8xdouble_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_unpack_high_mask3: @@ -15405,8 +15405,8 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15425,8 +15425,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: @@ -15442,7 +15442,7 @@ define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_unpack_high_mem_mask0: @@ -15456,8 +15456,8 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15477,8 +15477,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: @@ -15496,8 +15496,8 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15517,8 +15517,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: @@ -15536,8 +15536,8 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15557,8 +15557,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: @@ -15576,7 +15576,7 @@ define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_unpack_high_mem_mask3: @@ -15590,8 +15590,8 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [0:?] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15611,8 +15611,8 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [0:?] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [0:?] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: Index: test/CodeGen/X86/avx512vpopcntdq-schedule.ll =================================================================== --- test/CodeGen/X86/avx512vpopcntdq-schedule.ll +++ test/CodeGen/X86/avx512vpopcntdq-schedule.ll @@ -8,15 +8,15 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovw %esi, %k1 # sched: [1:0.33] ; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] -; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:0.50] -; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:0.50] +; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [0:?] +; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [0:?] +; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 # sched: [0:?] +; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [0:?] +; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [0:?] +; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [0:?] +; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -46,15 +46,15 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovw %esi, %k1 # sched: [1:0.33] ; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] -; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:0.50] -; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:0.50] +; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [0:?] +; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [0:?] +; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 # sched: [0:?] +; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [0:?] +; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [0:?] +; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [0:?] +; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [0:?] +; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [0:?] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] Index: test/CodeGen/X86/recip-fastmath.ll =================================================================== --- test/CodeGen/X86/recip-fastmath.ll +++ test/CodeGen/X86/recip-fastmath.ll @@ -895,7 +895,7 @@ ; KNL-LABEL: v16f32_no_estimate: ; KNL: # %bb.0: ; KNL-NEXT: vbroadcastss {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00] -; KNL-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [21:14.00] +; KNL-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [0:?] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v16f32_no_estimate: @@ -1024,9 +1024,9 @@ ; ; KNL-LABEL: v16f32_one_step: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50] +; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [0:?] +; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [0:?] +; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [0:?] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v16f32_one_step: @@ -1224,13 +1224,13 @@ ; ; KNL-LABEL: v16f32_two_step: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] +; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [0:?] ; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00] ; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [5:0.50] +; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [0:?] +; KNL-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [0:?] +; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [0:?] +; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [0:?] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v16f32_two_step: Index: test/CodeGen/X86/recip-fastmath2.ll =================================================================== --- test/CodeGen/X86/recip-fastmath2.ll +++ test/CodeGen/X86/recip-fastmath2.ll @@ -1323,10 +1323,10 @@ ; ; KNL-LABEL: v16f32_one_step2: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50] +; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [0:?] +; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [0:?] +; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [0:?] +; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [0:?] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v16f32_one_step2: @@ -1489,11 +1489,11 @@ ; ; KNL-LABEL: v16f32_one_step_2_divs: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [12:0.50] -; KNL-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:0.50] +; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [0:?] +; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [0:?] +; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [0:?] +; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [0:?] +; KNL-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [0:?] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v16f32_one_step_2_divs: @@ -1709,14 +1709,14 @@ ; ; KNL-LABEL: v16f32_two_step2: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] +; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [0:?] ; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00] ; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50] +; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [0:?] +; KNL-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [0:?] +; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [0:?] +; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [0:?] +; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [0:?] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v16f32_two_step2: @@ -1781,7 +1781,7 @@ ; ; KNL-LABEL: v16f32_no_step: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [11:2.00] +; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [0:?] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v16f32_no_step: @@ -1855,8 +1855,8 @@ ; ; KNL-LABEL: v16f32_no_step2: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [11:2.00] -; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50] +; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [0:?] +; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [0:?] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v16f32_no_step2: