Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -353,9 +353,9 @@ LI = &getAnalysis().getLoopInfo(); OptSize = F.optForSize(); + ProfileSummaryInfo *PSI = + getAnalysis().getPSI(); if (ProfileGuidedSectionPrefix) { - ProfileSummaryInfo *PSI = - getAnalysis().getPSI(); if (PSI->isFunctionHotInCallGraph(&F)) F.setSectionPrefix(".hot"); else if (PSI->isFunctionColdInCallGraph(&F)) @@ -364,7 +364,8 @@ /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. - if (!OptSize && TLI && TLI->isSlowDivBypassed()) { + if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI && + TLI->isSlowDivBypassed()) { const DenseMap &BypassWidths = TLI->getBypassSlowDivWidths(); BasicBlock* BB = &*F.begin(); Index: test/CodeGen/X86/bypass-slow-division-tune.ll =================================================================== --- test/CodeGen/X86/bypass-slow-division-tune.ll +++ test/CodeGen/X86/bypass-slow-division-tune.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=atom < %s | FileCheck -check-prefixes=ATOM,CHECK %s ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont < %s | FileCheck -check-prefixes=REST,CHECK %s ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck -check-prefixes=REST,CHECK %s +; RUN: llc -profile-summary-huge-working-set-size-threshold=1 -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck -check-prefixes=HUGEWS %s ; Verify that div32 is bypassed only for Atoms. define i32 @div32(i32 %a, i32 %b) { @@ -36,6 +37,15 @@ define i64 @div64_optsize(i64 %a, i64 %b) optsize { ; CHECK-LABEL: div64_optsize: ; CHECK-NOT: divl +; CHECK: ret + %div = sdiv i64 %a, %b + ret i64 %div +} + +define i64 @div64_hugews(i64 %a, i64 %b) { +; HUGEWS-LABEL: div64_hugews: +; HUGEWS-NOT: divl +; HUGEWS: ret %div = sdiv i64 %a, %b ret i64 %div } @@ -43,6 +53,7 @@ define i32 @div32_optsize(i32 %a, i32 %b) optsize { ; CHECK-LABEL: div32_optsize: ; CHECK-NOT: divb +; CHECK: ret %div = sdiv i32 %a, %b ret i32 %div } @@ -50,6 +61,23 @@ define i32 @div32_minsize(i32 %a, i32 %b) minsize { ; CHECK-LABEL: div32_minsize: ; CHECK-NOT: divb +; CHECK: ret %div = sdiv i32 %a, %b ret i32 %div } + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 1000, i32 1} +!13 = !{i32 999000, i64 1000, i32 3} +!14 = !{i32 999999, i64 5, i32 3}