Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -4881,26 +4881,29 @@ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, Sched<[sched.Folded]>; } -// FIXME: YMM cases should use SchedWriteShuffle.YMM. + multiclass SS41I_pmovx_rm_all opc, string OpcodeStr, X86MemOperand MemOp, X86MemOperand MemYOp, - X86SchedWriteWidths sched, Predicate prd> { - defm NAME : SS41I_pmovx_rrrm; + Predicate prd> { + defm NAME : SS41I_pmovx_rrrm; let Predicates = [HasAVX, prd] in defm V#NAME : SS41I_pmovx_rrrm, VEX, VEX_WIG; + VR128, VR128, SchedWriteShuffle.XMM>, + VEX, VEX_WIG; let Predicates = [HasAVX2, prd] in defm V#NAME#Y : SS41I_pmovx_rrrm, VEX, VEX_L, VEX_WIG; + VR256, VR128, WriteShuffle256>, + VEX, VEX_L, VEX_WIG; } multiclass SS41I_pmovx_rm opc, string OpcodeStr, X86MemOperand MemOp, X86MemOperand MemYOp, Predicate prd> { defm PMOVSX#NAME : SS41I_pmovx_rm_all; + MemOp, MemYOp, prd>; defm PMOVZX#NAME : SS41I_pmovx_rm_all; + MemOp, MemYOp, prd>; } defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>; Index: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td +++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td @@ -687,19 +687,7 @@ let ResourceCycles = [1]; } def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTBrr", - "VPBROADCASTWrr", - "VPMOVSXBDYrr", - "VPMOVSXBQYrr", - "VPMOVSXBWYrr", - "VPMOVSXDQYrr", - "VPMOVSXWDYrr", - "VPMOVSXWQYrr", - "VPMOVZXBDYrr", - "VPMOVZXBQYrr", - "VPMOVZXBWYrr", - "VPMOVZXDQYrr", - "VPMOVZXWDYrr", - "VPMOVZXWQYrr")>; + "VPBROADCASTWrr")>; def BWWriteResGroup30 : SchedWriteRes<[BWPort0156]> { let Latency = 2; Index: llvm/trunk/lib/Target/X86/X86SchedHaswell.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedHaswell.td +++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td @@ -1287,19 +1287,7 @@ let ResourceCycles = [1]; } def: InstRW<[HWWriteResGroup51], (instregex "VPBROADCASTBrr", - "VPBROADCASTWrr", - "VPMOVSXBDYrr", - "VPMOVSXBQYrr", - "VPMOVSXBWYrr", - "VPMOVSXDQYrr", - "VPMOVSXWDYrr", - "VPMOVSXWQYrr", - "VPMOVZXBDYrr", - "VPMOVZXBQYrr", - "VPMOVZXBWYrr", - "VPMOVZXDQYrr", - "VPMOVZXWDYrr", - "VPMOVZXWQYrr")>; + "VPBROADCASTWrr")>; def HWWriteResGroup52 : SchedWriteRes<[HWPort1,HWPort23]> { let Latency = 9; @@ -1320,17 +1308,6 @@ "VCVTPS2DQYrm", "VCVTTPS2DQYrm")>; -def HWWriteResGroup53 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 10; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXBDYrm", - "VPMOVZXBQYrm", - "VPMOVZXBWYrm", - "VPMOVZXDQYrm", - "VPMOVZXWQYrm")>; - def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> { let Latency = 9; let NumMicroOps = 2; Index: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td +++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td @@ -731,19 +731,7 @@ "(ADD|SUB|SUBR)_FrST0", "VPBROADCASTBrr", "VPBROADCASTWrr", - "(V?)PCMPGTQ(Y?)rr", - "VPMOVSXBDYrr", - "VPMOVSXBQYrr", - "VPMOVSXBWYrr", - "VPMOVSXDQYrr", - "VPMOVSXWDYrr", - "VPMOVSXWQYrr", - "VPMOVZXBDYrr", - "VPMOVZXBQYrr", - "VPMOVZXBWYrr", - "VPMOVZXDQYrr", - "VPMOVZXWDYrr", - "VPMOVZXWQYrr")>; + "(V?)PCMPGTQ(Y?)rr")>; def SKLWriteResGroup31 : SchedWriteRes<[SKLPort0,SKLPort5]> { let Latency = 3; @@ -1558,12 +1546,7 @@ } def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m", "ILD_F(16|32|64)m", - "VPCMPGTQYrm", - "VPMOVZXBDYrm", - "VPMOVZXBQYrm", - "VPMOVZXBWYrm", - "VPMOVZXDQYrm", - "VPMOVZXWQYrm")>; + "VPCMPGTQYrm")>; def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> { let Latency = 10; Index: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td +++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td @@ -1062,18 +1062,6 @@ "VPMINUQZ128rr", "VPMINUQZ256rr", "VPMINUQZrr", - "VPMOVSXBDYrr", - "VPMOVSXBQYrr", - "VPMOVSXBWYrr", - "VPMOVSXDQYrr", - "VPMOVSXWDYrr", - "VPMOVSXWQYrr", - "VPMOVZXBDYrr", - "VPMOVZXBQYrr", - "VPMOVZXBWYrr", - "VPMOVZXDQYrr", - "VPMOVZXWDYrr", - "VPMOVZXWQYrr", "VPSADBWZrr", // TODO: 512-bit ops require ports 0/1 to be joined. "VPTESTMBZ128rr", "VPTESTMBZ256rr", @@ -2603,11 +2591,6 @@ "VPMINSQZrm(b?)", "VPMINUQZ256rm(b?)", "VPMINUQZrm(b?)", - "VPMOVZXBDYrm", - "VPMOVZXBQYrm", - "VPMOVZXBWYrm", - "VPMOVZXDQYrm", - "VPMOVZXWQYrm", "VPTESTMBZ256rm(b?)", "VPTESTMBZrm(b?)", "VPTESTMDZ256rm(b?)", Index: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td +++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td @@ -940,15 +940,20 @@ let NumMicroOps = 2; } def ZnWriteFPU12m : SchedWriteRes<[ZnAGU, ZnFPU12]> ; +def ZnWriteFPU12Ym : SchedWriteRes<[ZnAGU, ZnFPU12]> { + let Latency = 8; + let NumMicroOps = 2; +} def : InstRW<[ZnWriteFPU12], (instregex "MMX_PACKSSDWirr", "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>; def : InstRW<[ZnWriteFPU12m], (instregex "MMX_PACKSSDWirm", "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>; -// VPMOVSX/ZX BW BD BQ DW DQ. +// VPMOVSX/ZX BW BD BQ WD WQ DQ. // y <- x. -def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>; +def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrr")>; +def : InstRW<[ZnWriteFPU12Ym], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrm")>; def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ; def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> { Index: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll @@ -4138,7 +4138,7 @@ ; GENERIC-LABEL: test_pmovsxbd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4173,7 +4173,7 @@ ; ZNVER1-LABEL: test_pmovsxbd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.25] +; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.50] ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> @@ -4189,7 +4189,7 @@ ; GENERIC-LABEL: test_pmovsxbq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4240,7 +4240,7 @@ ; GENERIC-LABEL: test_pmovsxbw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00] ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4289,7 +4289,7 @@ ; GENERIC-LABEL: test_pmovsxdq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4338,7 +4338,7 @@ ; GENERIC-LABEL: test_pmovsxwd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00] ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4373,7 +4373,7 @@ ; ZNVER1-LABEL: test_pmovsxwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.25] +; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.50] ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = sext <8 x i16> %a0 to <8 x i32> @@ -4387,7 +4387,7 @@ ; GENERIC-LABEL: test_pmovsxwq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4422,7 +4422,7 @@ ; ZNVER1-LABEL: test_pmovsxwq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.25] +; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.50] ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> @@ -4438,7 +4438,7 @@ ; GENERIC-LABEL: test_pmovzxbd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4473,7 +4473,7 @@ ; ZNVER1-LABEL: test_pmovzxbd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.25] +; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.50] ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> @@ -4489,7 +4489,7 @@ ; GENERIC-LABEL: test_pmovzxbq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4540,7 +4540,7 @@ ; GENERIC-LABEL: test_pmovzxbw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4589,7 +4589,7 @@ ; GENERIC-LABEL: test_pmovzxdq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4638,7 +4638,7 @@ ; GENERIC-LABEL: test_pmovzxwd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4673,7 +4673,7 @@ ; ZNVER1-LABEL: test_pmovzxwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25] +; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = zext <8 x i16> %a0 to <8 x i32> @@ -4687,7 +4687,7 @@ ; GENERIC-LABEL: test_pmovzxwq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4722,7 +4722,7 @@ ; ZNVER1-LABEL: test_pmovzxwq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25] +; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> Index: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll @@ -3487,7 +3487,7 @@ define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { ; GENERIC-LABEL: sext_4x8mem_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x8mem_to_4x64: @@ -3653,7 +3653,7 @@ define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x16mem_to_8x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x16mem_to_8x32: @@ -3883,7 +3883,7 @@ define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_4x16mem_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x16mem_to_4x64: @@ -4081,7 +4081,7 @@ define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { ; GENERIC-LABEL: sext_4x32mem_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x32mem_to_4x64: Index: llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-avx2.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-avx2.s +++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-avx2.s @@ -620,30 +620,30 @@ # CHECK-NEXT: 1 1 0.25 vpminuw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpminuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 2 2 2.00 vpmovmskb %ymm0, %ecx -# CHECK-NEXT: 1 1 0.25 vpmovsxbd %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovsxbd (%rax), %ymm2 +# CHECK-NEXT: 2 1 0.50 vpmovsxbd %xmm0, %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovsxbd (%rax), %ymm2 # CHECK-NEXT: 2 1 0.50 vpmovsxbq %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovsxbq (%rax), %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovsxbq (%rax), %ymm2 # CHECK-NEXT: 2 1 0.50 vpmovsxbw %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovsxbw (%rax), %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovsxbw (%rax), %ymm2 # CHECK-NEXT: 2 1 0.50 vpmovsxdq %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovsxdq (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.25 vpmovsxwd %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovsxwd (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.25 vpmovsxwq %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovsxwq (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.25 vpmovzxbd %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovzxbd (%rax), %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovsxdq (%rax), %ymm2 +# CHECK-NEXT: 2 1 0.50 vpmovsxwd %xmm0, %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovsxwd (%rax), %ymm2 +# CHECK-NEXT: 2 1 0.50 vpmovsxwq %xmm0, %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovsxwq (%rax), %ymm2 +# CHECK-NEXT: 2 1 0.50 vpmovzxbd %xmm0, %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovzxbd (%rax), %ymm2 # CHECK-NEXT: 2 1 0.50 vpmovzxbq %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovzxbq (%rax), %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovzxbq (%rax), %ymm2 # CHECK-NEXT: 2 1 0.50 vpmovzxbw %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovzxbw (%rax), %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovzxbw (%rax), %ymm2 # CHECK-NEXT: 2 1 0.50 vpmovzxdq %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovzxdq (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.25 vpmovzxwd %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovzxwd (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.25 vpmovzxwq %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovzxwq (%rax), %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovzxdq (%rax), %ymm2 +# CHECK-NEXT: 2 1 0.50 vpmovzxwd %xmm0, %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovzxwd (%rax), %ymm2 +# CHECK-NEXT: 2 1 0.50 vpmovzxwq %xmm0, %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovzxwq (%rax), %ymm2 # CHECK-NEXT: 1 4 1.00 vpmuldq %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 11 1.00 * vpmuldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 4 1.00 vpmulhrsw %ymm0, %ymm1, %ymm2 @@ -773,7 +773,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 63.50 63.50 - - - - - 77.17 70.67 80.00 47.17 - +# CHECK-NEXT: 63.50 63.50 - - - - - 72.67 75.17 84.50 42.67 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -940,30 +940,30 @@ # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpminuw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpminuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - 2.00 - - vpmovmskb %ymm0, %ecx -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovsxbd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxbd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbd %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxbq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbq (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbw %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxbw (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbw (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxdq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxdq (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovsxwd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxwd (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovsxwq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxwq (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovzxbd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxbd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxdq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxwd %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxwd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxwq %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxwq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbd %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxbq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbq (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbw %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxbw (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbw (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxdq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxdq (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovzxwd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxwd (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovzxwq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxwq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxdq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxwd %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxwd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxwq %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxwq (%rax), %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmuldq %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - vpmuldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmulhrsw %ymm0, %ymm1, %ymm2