Index: include/llvm/CodeGen/TargetSchedule.h =================================================================== --- include/llvm/CodeGen/TargetSchedule.h +++ include/llvm/CodeGen/TargetSchedule.h @@ -40,6 +40,8 @@ unsigned ResourceLCM; // Resource units per cycle. Latency normalization factor. unsigned computeInstrLatency(const MCSchedClassDesc &SCDesc) const; + Optional + getRThroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc) const; public: TargetSchedModel() : SchedModel(MCSchedModel::GetDefaultSchedModel()) {} Index: lib/CodeGen/TargetSchedule.cpp =================================================================== --- lib/CodeGen/TargetSchedule.cpp +++ lib/CodeGen/TargetSchedule.cpp @@ -338,7 +338,7 @@ static Optional getRThroughputFromItineraries(unsigned schedClass, - const InstrItineraryData *IID){ + const InstrItineraryData *IID) { double Unknown = std::numeric_limits::infinity(); double Throughput = Unknown; @@ -351,28 +351,31 @@ Throughput = std::min(Throughput, countPopulation(IS->getUnits()) * 1.0 / Cycles); } + if (Throughput == Unknown) + return Optional(); // We need reciprocal throughput that's why we return such value. return 1 / Throughput; } -static Optional -getRThroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc, - const TargetSubtargetInfo *STI, - const MCSchedModel &SchedModel) { +Optional TargetSchedModel::getRThroughputFromInstrSchedModel( + const MCSchedClassDesc *SCDesc) const { double Unknown = std::numeric_limits::infinity(); double Throughput = Unknown; - - for (const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc), - *WEnd = STI->getWriteProcResEnd(SCDesc); - WPR != WEnd; ++WPR) { + const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc), + *WEnd = STI->getWriteProcResEnd(SCDesc); + if ((WPR == WEnd) && SCDesc->isValid()) + return computeInstrLatency(*SCDesc); + for (; WPR != WEnd; ++WPR) { unsigned Cycles = WPR->Cycles; if (!Cycles) - return Optional(); + continue; unsigned NumUnits = SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits; Throughput = std::min(Throughput, NumUnits * 1.0 / Cycles); } + if (Throughput == Unknown) + return Optional(); // We need reciprocal throughput that's why we return such value. return 1 / Throughput; } @@ -383,8 +386,7 @@ return getRThroughputFromItineraries(MI->getDesc().getSchedClass(), getInstrItineraries()); if (hasInstrSchedModel()) - return getRThroughputFromInstrSchedModel(resolveSchedClass(MI), STI, - SchedModel); + return getRThroughputFromInstrSchedModel(resolveSchedClass(MI)); return Optional(); } @@ -396,7 +398,7 @@ if (hasInstrSchedModel()) { const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); if (SCDesc->isValid() && !SCDesc->isVariant()) - return getRThroughputFromInstrSchedModel(SCDesc, STI, SchedModel); + return getRThroughputFromInstrSchedModel(SCDesc); } return Optional(); } Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -332,6 +332,18 @@ let ResourceCycles = [1, 1]; } +def WriteJVZEROALL: SchedWriteRes<[]> { + let Latency = 90; + let NumMicroOps = 73; +} +def : InstRW<[WriteJVZEROALL], (instregex "VZEROALL")>; + +def WriteJVZEROUPPER: SchedWriteRes<[]> { + let Latency = 46; + let NumMicroOps = 37; +} +def : InstRW<[WriteJVZEROUPPER], (instregex "VZEROUPPER")>; + // FIXME: pipe for system/microcode? def : WriteRes { let Latency = 100; } def : WriteRes { let Latency = 100; } Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -874,7 +874,7 @@ ; HASWELL: # BB#0: ; HASWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [4:1.00] -; HASWELL-NEXT: vzeroupper # sched: [1:0.00] +; HASWELL-NEXT: vzeroupper # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_extractf128: @@ -887,7 +887,7 @@ ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.50] ; ZNVER1-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; ZNVER1-NEXT: vzeroupper # sched: [?:0.000000e+00] +; ZNVER1-NEXT: vzeroupper # sched: [46:46.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = shufflevector <8 x float> %a0, <8 x float> undef, <4 x i32> %2 = shufflevector <8 x float> %a1, <8 x float> undef, <4 x i32> @@ -1458,7 +1458,7 @@ ; HASWELL-LABEL: test_movmskpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: vzeroupper # sched: [1:0.00] +; HASWELL-NEXT: vzeroupper # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movmskpd: @@ -1469,7 +1469,7 @@ ; ZNVER1-LABEL: test_movmskpd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vmovmskpd %ymm0, %eax # sched: [1:0.50] -; ZNVER1-NEXT: vzeroupper # sched: [?:0.000000e+00] +; ZNVER1-NEXT: vzeroupper # sched: [46:46.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ret i32 %1 @@ -1486,7 +1486,7 @@ ; HASWELL-LABEL: test_movmskps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: vzeroupper # sched: [1:0.00] +; HASWELL-NEXT: vzeroupper # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movmskps: @@ -1497,7 +1497,7 @@ ; ZNVER1-LABEL: test_movmskps: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vmovmskps %ymm0, %eax # sched: [1:0.50] -; ZNVER1-NEXT: vzeroupper # sched: [?:0.000000e+00] +; ZNVER1-NEXT: vzeroupper # sched: [46:46.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ret i32 %1 @@ -2509,7 +2509,7 @@ ; HASWELL-NEXT: setb %al # sched: [1:0.50] ; HASWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; HASWELL-NEXT: vzeroupper # sched: [1:0.00] +; HASWELL-NEXT: vzeroupper # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_testpd_ymm: @@ -2528,7 +2528,7 @@ ; ZNVER1-NEXT: setb %al # sched: [1:0.50] ; ZNVER1-NEXT: vtestpd (%rdi), %ymm0 # sched: [6:1.00] ; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.50] -; ZNVER1-NEXT: vzeroupper # sched: [?:0.000000e+00] +; ZNVER1-NEXT: vzeroupper # sched: [46:46.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) %2 = load <4 x double>, <4 x double> *%a2, align 32 @@ -2600,7 +2600,7 @@ ; HASWELL-NEXT: setb %al # sched: [1:0.50] ; HASWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; HASWELL-NEXT: vzeroupper # sched: [1:0.00] +; HASWELL-NEXT: vzeroupper # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_testps_ymm: @@ -2619,7 +2619,7 @@ ; ZNVER1-NEXT: setb %al # sched: [1:0.50] ; ZNVER1-NEXT: vtestps (%rdi), %ymm0 # sched: [6:1.00] ; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.50] -; ZNVER1-NEXT: vzeroupper # sched: [?:0.000000e+00] +; ZNVER1-NEXT: vzeroupper # sched: [46:46.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) %2 = load <8 x float>, <8 x float> *%a2, align 32 @@ -2845,17 +2845,17 @@ ; ; HASWELL-LABEL: test_zeroall: ; HASWELL: # BB#0: -; HASWELL-NEXT: vzeroall # sched: [1:0.00] +; HASWELL-NEXT: vzeroall # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_zeroall: ; BTVER2: # BB#0: -; BTVER2-NEXT: vzeroall # sched: [?:0.000000e+00] +; BTVER2-NEXT: vzeroall # sched: [90:90.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_zeroall: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vzeroall # sched: [?:0.000000e+00] +; ZNVER1-NEXT: vzeroall # sched: [90:90.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] call void @llvm.x86.avx.vzeroall() ret void @@ -2870,17 +2870,17 @@ ; ; HASWELL-LABEL: test_zeroupper: ; HASWELL: # BB#0: -; HASWELL-NEXT: vzeroupper # sched: [1:0.00] +; HASWELL-NEXT: vzeroupper # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_zeroupper: ; BTVER2: # BB#0: -; BTVER2-NEXT: vzeroupper # sched: [?:0.000000e+00] +; BTVER2-NEXT: vzeroupper # sched: [46:46.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_zeroupper: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vzeroupper # sched: [?:0.000000e+00] +; ZNVER1-NEXT: vzeroupper # sched: [46:46.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] call void @llvm.x86.avx.vzeroupper() ret void