diff --git a/clang/test/CodeGen/pgo-sample-preparation.c b/clang/test/CodeGen/pgo-sample-preparation.c deleted file mode 100644 --- a/clang/test/CodeGen/pgo-sample-preparation.c +++ /dev/null @@ -1,16 +0,0 @@ -// Test if PGO sample use preparation passes are executed correctly. -// -// Ensure that instcombine is executed after simplifycfg and sroa so that -// "a < 255" will not be converted to a * 256 < 255 * 256. -// RUN: %clang_cc1 -O2 -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s -emit-llvm -o - 2>&1 | FileCheck %s - -void bar(int); -void foo(int x, int y, int z) { - int m; - for (m = 0; m < x ; m++) { - int a = (((y >> 8) & 0xff) * z) / 256; - bar(a < 255 ? a : 255); - } -} - -// CHECK-NOT: icmp slt i32 %mul, 65280 diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -274,6 +274,10 @@ "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation")); +static cl::opt AlwaysAllowSimplifyCFGSpeculation( + "always-allow-simplify-cfg-speculation", cl::init(false), cl::Hidden, + cl::desc("Enable SimplifyCFG speculation even before profile annotation")); + PipelineTuningOptions::PipelineTuningOptions() { LoopInterleaving = true; LoopVectorization = true; @@ -747,8 +751,13 @@ FunctionPassManager FPM; FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. - FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( - true))); // Merge & remove basic blocks. + FPM.addPass(SimplifyCFGPass( + SimplifyCFGOptions() + .convertSwitchRangeToICmp(true) // Merge & remove basic blocks. + // Don't enable speculation before PGO annotation unless forced, + // since later invocations will utilize the profile for speculation + // decisions. + .speculateBlocks(AlwaysAllowSimplifyCFGSpeculation))); FPM.addPass(InstCombinePass()); // Combine silly sequences. invokePeepholeEPCallbacks(FPM, Level); @@ -985,6 +994,13 @@ HasSampleProfile && !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink); + bool LoadFirstSampleProfile = + HasSampleProfile && Phase != ThinOrFullLTOPhase::ThinLTOPostLink; + + bool DoPGOInstr = PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && + (PGOOpt->Action == PGOOptions::IRInstr || + PGOOpt->Action == PGOOptions::IRUse); + // During the ThinLTO backend phase we perform early indirect call promotion // here, before globalopt. Otherwise imported available_externally functions // look unreferenced and are removed. If we are going to load the sample @@ -1016,7 +1032,11 @@ // Compare/branch metadata may alter the behavior of passes like // SimplifyCFG. EarlyFPM.addPass(LowerExpectIntrinsicPass()); - EarlyFPM.addPass(SimplifyCFGPass()); + // Don't enable speculation before PGO annotation unless forced, since later + // invocations will utilize the profile for speculation decisions. + EarlyFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().speculateBlocks( + (!DoPGOInstr && !LoadFirstSampleProfile) | + AlwaysAllowSimplifyCFGSpeculation))); EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG)); EarlyFPM.addPass(EarlyCSEPass()); if (Level == OptimizationLevel::O3) @@ -1083,8 +1103,13 @@ GlobalCleanupPM.addPass(PromotePass()); GlobalCleanupPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(GlobalCleanupPM, Level); - GlobalCleanupPM.addPass( - SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + // Don't enable speculation before PGO annotation unless forced, since later + // invocations will utilize the profile for speculation decisions. At this + // point SamplePGO annotation is done, so we don't need to disable it in that + // case. + GlobalCleanupPM.addPass(SimplifyCFGPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true).speculateBlocks( + !DoPGOInstr | AlwaysAllowSimplifyCFGSpeculation))); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM), PTO.EagerlyInvalidateAnalyses)); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6978,7 +6978,8 @@ // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + if (Options.SpeculateBlocks && + FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); return false; @@ -7048,7 +7049,8 @@ // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + if (Options.SpeculateBlocks && + FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll --- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=10 | FileCheck %s +; RUN: opt < %s -S -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=10 | FileCheck %s --check-prefix=NOSPECULATE declare void @sideeffect0() declare void @sideeffect1() @@ -12,10 +13,12 @@ define void @one_pred(i8 %v0, i8 %v1) { ; CHECK-LABEL: @one_pred( +; NOSPECULATE-LABEL: @one_pred( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false +; NOSPECULATE-NOT: select ; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void @@ -42,6 +45,7 @@ define void @two_preds(i8 %v0, i8 %v1, i8 %v2, i8 %v3) { ; CHECK-LABEL: @two_preds( +; NOSPECULATE-LABEL: @two_preds( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[PRED0:%.*]], label [[PRED1:%.*]] @@ -49,6 +53,7 @@ ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1:%.*]], 0 ; CHECK-NEXT: [[C3_OLD:%.*]] = icmp eq i8 [[V3:%.*]], 0 ; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[C1]], i1 true, i1 [[C3_OLD]] +; NOSPECULATE-NOT: select ; CHECK-NEXT: br i1 [[OR_COND1]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] ; CHECK: pred1: ; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[V2:%.*]], 0 @@ -88,11 +93,13 @@ define void @one_pred_with_extra_op(i8 %v0, i8 %v1) { ; CHECK-LABEL: @one_pred_with_extra_op( +; NOSPECULATE-LABEL: @one_pred_with_extra_op( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: [[V1_ADJ:%.*]] = add i8 [[V0]], [[V1:%.*]] ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1_ADJ]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false +; NOSPECULATE-NOT: select ; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void @@ -123,11 +130,13 @@ ; function is speculatable, it can never cause UB. So, we need not technically drop it. define void @one_pred_with_spec_call(i8 %v0, i8 %v1, ptr %p) { ; CHECK-LABEL: @one_pred_with_spec_call( +; NOSPECULATE-LABEL: @one_pred_with_spec_call( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp ne ptr [[P:%.*]], null ; CHECK-NEXT: [[X:%.*]] = call i32 @speculate_call(ptr nonnull [[P]]) ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false +; NOSPECULATE-NOT: select ; CHECK-NEXT: br i1 [[OR_COND]], label [[COMMON_RET:%.*]], label [[FINAL_RIGHT:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void @@ -155,11 +164,13 @@ ; Drop dereferenceable on the parameter define void @one_pred_with_spec_call_deref(i8 %v0, i8 %v1, ptr %p) { ; CHECK-LABEL: @one_pred_with_spec_call_deref( +; NOSPECULATE-LABEL: @one_pred_with_spec_call_deref( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp ne ptr [[P:%.*]], null ; CHECK-NEXT: [[X:%.*]] = call i32 @speculate_call(ptr [[P]]) ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false +; NOSPECULATE-NOT: select ; CHECK-NEXT: br i1 [[OR_COND]], label [[COMMON_RET:%.*]], label [[FINAL_RIGHT:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void @@ -186,6 +197,7 @@ define void @two_preds_with_extra_op(i8 %v0, i8 %v1, i8 %v2, i8 %v3) { ; CHECK-LABEL: @two_preds_with_extra_op( +; NOSPECULATE-LABEL: @two_preds_with_extra_op( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[PRED0:%.*]], label [[PRED1:%.*]] @@ -194,6 +206,7 @@ ; CHECK-NEXT: [[V3_ADJ_OLD:%.*]] = add i8 [[V1]], [[V2:%.*]] ; CHECK-NEXT: [[C3_OLD:%.*]] = icmp eq i8 [[V3_ADJ_OLD]], 0 ; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[C1]], i1 true, i1 [[C3_OLD]] +; NOSPECULATE-NOT: select ; CHECK-NEXT: br i1 [[OR_COND1]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] ; CHECK: pred1: ; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[V2]], 0 @@ -235,12 +248,14 @@ define void @one_pred_with_extra_op_multiuse(i8 %v0, i8 %v1) { ; CHECK-LABEL: @one_pred_with_extra_op_multiuse( +; NOSPECULATE-LABEL: @one_pred_with_extra_op_multiuse( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: [[V1_ADJ:%.*]] = add i8 [[V0]], [[V1:%.*]] ; CHECK-NEXT: [[V1_ADJ_ADJ:%.*]] = add i8 [[V1_ADJ]], [[V1_ADJ]] ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1_ADJ_ADJ]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false +; NOSPECULATE-NOT: select ; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void @@ -269,6 +284,7 @@ define void @two_preds_with_extra_op_multiuse(i8 %v0, i8 %v1, i8 %v2, i8 %v3) { ; CHECK-LABEL: @two_preds_with_extra_op_multiuse( +; NOSPECULATE-LABEL: @two_preds_with_extra_op_multiuse( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[PRED0:%.*]], label [[PRED1:%.*]] @@ -278,6 +294,7 @@ ; CHECK-NEXT: [[V3_ADJ_ADJ_OLD:%.*]] = add i8 [[V3_ADJ_OLD]], [[V3_ADJ_OLD]] ; CHECK-NEXT: [[C3_OLD:%.*]] = icmp eq i8 [[V3_ADJ_ADJ_OLD]], 0 ; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[C1]], i1 true, i1 [[C3_OLD]] +; NOSPECULATE-NOT: select ; CHECK-NEXT: br i1 [[OR_COND1]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] ; CHECK: pred1: ; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[V2]], 0 @@ -322,6 +339,7 @@ define void @one_pred_with_extra_op_liveout(i8 %v0, i8 %v1) { ; CHECK-LABEL: @one_pred_with_extra_op_liveout( +; NOSPECULATE-LABEL: @one_pred_with_extra_op_liveout( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[DISPATCH:%.*]], label [[FINAL_RIGHT:%.*]] @@ -356,6 +374,7 @@ } define void @one_pred_with_extra_op_liveout_multiuse(i8 %v0, i8 %v1) { ; CHECK-LABEL: @one_pred_with_extra_op_liveout_multiuse( +; NOSPECULATE-LABEL: @one_pred_with_extra_op_liveout_multiuse( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[DISPATCH:%.*]], label [[FINAL_RIGHT:%.*]] @@ -393,6 +412,7 @@ define void @one_pred_with_extra_op_liveout_distant_phi(i8 %v0, i8 %v1) { ; CHECK-LABEL: @one_pred_with_extra_op_liveout_distant_phi( +; NOSPECULATE-LABEL: @one_pred_with_extra_op_liveout_distant_phi( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[PRED:%.*]], label [[LEFT_END:%.*]] @@ -444,6 +464,7 @@ define void @two_preds_with_extra_op_liveout(i8 %v0, i8 %v1, i8 %v2, i8 %v3) { ; CHECK-LABEL: @two_preds_with_extra_op_liveout( +; NOSPECULATE-LABEL: @two_preds_with_extra_op_liveout( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[PRED0:%.*]], label [[PRED1:%.*]] @@ -455,6 +476,7 @@ ; CHECK-NEXT: [[V3_ADJ:%.*]] = add i8 [[V1]], [[V2]] ; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[V3_ADJ]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C2]], i1 [[C3]], i1 false +; NOSPECULATE-NOT: select ; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT]], label [[FINAL_RIGHT:%.*]] ; CHECK: dispatch: ; CHECK-NEXT: [[V3_ADJ_OLD:%.*]] = add i8 [[V1]], [[V2]] @@ -496,6 +518,7 @@ define void @two_preds_with_extra_op_liveout_multiuse(i8 %v0, i8 %v1, i8 %v2, i8 %v3) { ; CHECK-LABEL: @two_preds_with_extra_op_liveout_multiuse( +; NOSPECULATE-LABEL: @two_preds_with_extra_op_liveout_multiuse( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[PRED0:%.*]], label [[PRED1:%.*]] @@ -507,6 +530,7 @@ ; CHECK-NEXT: [[V3_ADJ:%.*]] = add i8 [[V1]], [[V2]] ; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[V3_ADJ]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C2]], i1 [[C3]], i1 false +; NOSPECULATE-NOT: select ; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT]], label [[FINAL_RIGHT:%.*]] ; CHECK: dispatch: ; CHECK-NEXT: [[V3_ADJ_OLD:%.*]] = add i8 [[V1]], [[V2]] @@ -555,6 +579,7 @@ define void @one_pred_with_extra_op_eexternally_used_only(i8 %v0, i8 %v1) { ; CHECK-LABEL: @one_pred_with_extra_op_eexternally_used_only( +; NOSPECULATE-LABEL: @one_pred_with_extra_op_eexternally_used_only( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[DISPATCH:%.*]], label [[FINAL_RIGHT:%.*]] @@ -589,6 +614,7 @@ } define void @one_pred_with_extra_op_externally_used_only_multiuse(i8 %v0, i8 %v1) { ; CHECK-LABEL: @one_pred_with_extra_op_externally_used_only_multiuse( +; NOSPECULATE-LABEL: @one_pred_with_extra_op_externally_used_only_multiuse( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[DISPATCH:%.*]], label [[FINAL_RIGHT:%.*]] @@ -626,6 +652,7 @@ define void @two_preds_with_extra_op_externally_used_only(i8 %v0, i8 %v1, i8 %v2, i8 %v3) { ; CHECK-LABEL: @two_preds_with_extra_op_externally_used_only( +; NOSPECULATE-LABEL: @two_preds_with_extra_op_externally_used_only( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[PRED0:%.*]], label [[PRED1:%.*]] @@ -637,6 +664,7 @@ ; CHECK-NEXT: [[V3_ADJ:%.*]] = add i8 [[V1]], [[V2]] ; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[V3:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C2]], i1 [[C3]], i1 false +; NOSPECULATE-NOT: select ; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT]], label [[FINAL_RIGHT:%.*]] ; CHECK: dispatch: ; CHECK-NEXT: [[V3_ADJ_OLD:%.*]] = add i8 [[V1]], [[V2]] @@ -678,6 +706,7 @@ define void @two_preds_with_extra_op_externally_used_only_multiuse(i8 %v0, i8 %v1, i8 %v2, i8 %v3) { ; CHECK-LABEL: @two_preds_with_extra_op_externally_used_only_multiuse( +; NOSPECULATE-LABEL: @two_preds_with_extra_op_externally_used_only_multiuse( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[PRED0:%.*]], label [[PRED1:%.*]] @@ -689,6 +718,7 @@ ; CHECK-NEXT: [[V3_ADJ:%.*]] = add i8 [[V1]], [[V2]] ; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[V3:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C2]], i1 [[C3]], i1 false +; NOSPECULATE-NOT: select ; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT]], label [[FINAL_RIGHT:%.*]] ; CHECK: dispatch: ; CHECK-NEXT: [[V3_ADJ_OLD:%.*]] = add i8 [[V1]], [[V2]] @@ -735,6 +765,7 @@ ; The liveout instruction can be located after the branch condition. define void @one_pred_with_extra_op_externally_used_only_after_cond_distant_phi(i8 %v0, i8 %v1, i8 %v3, i8 %v4, i8 %v5) { ; CHECK-LABEL: @one_pred_with_extra_op_externally_used_only_after_cond_distant_phi( +; NOSPECULATE-LABEL: @one_pred_with_extra_op_externally_used_only_after_cond_distant_phi( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[PRED:%.*]], label [[LEFT_END:%.*]] @@ -785,6 +816,7 @@ } define void @two_preds_with_extra_op_externally_used_only_after_cond(i8 %v0, i8 %v1, i8 %v2, i8 %v3, i8 %v4, i8 %v5) { ; CHECK-LABEL: @two_preds_with_extra_op_externally_used_only_after_cond( +; NOSPECULATE-LABEL: @two_preds_with_extra_op_externally_used_only_after_cond( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 ; CHECK-NEXT: br i1 [[C0]], label [[PRED0:%.*]], label [[PRED1:%.*]] @@ -796,6 +828,7 @@ ; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[V3:%.*]], 0 ; CHECK-NEXT: [[V3_ADJ:%.*]] = add i8 [[V4:%.*]], [[V5:%.*]] ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C2]], i1 [[C3]], i1 false +; NOSPECULATE-NOT: select ; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT]], label [[FINAL_RIGHT:%.*]] ; CHECK: dispatch: ; CHECK-NEXT: [[C3_OLD:%.*]] = icmp eq i8 [[V3]], 0 @@ -837,6 +870,7 @@ define void @pr48450() { ; CHECK-LABEL: @pr48450( +; NOSPECULATE-LABEL: @pr48450( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -883,6 +917,7 @@ define void @pr48450_2(i1 %enable_loopback) { ; CHECK-LABEL: @pr48450_2( +; NOSPECULATE-LABEL: @pr48450_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -941,6 +976,7 @@ @f.b = external global i8, align 1 define void @pr48450_3() { ; CHECK-LABEL: @pr48450_3( +; NOSPECULATE-LABEL: @pr48450_3( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_COND1:%.*]] ; CHECK: for.cond1: @@ -996,6 +1032,7 @@ define void @pr49510() { ; CHECK-LABEL: @pr49510( +; NOSPECULATE-LABEL: @pr49510( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: @@ -1031,6 +1068,7 @@ define i32 @pr51125() { ; CHECK-LABEL: @pr51125( +; NOSPECULATE-LABEL: @pr51125( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[L:%.*]] ; CHECK: L: @@ -1066,6 +1104,7 @@ ; https://github.com/llvm/llvm-project/issues/53861 define i32 @firewall(ptr %data) { ; CHECK-LABEL: @firewall( +; NOSPECULATE-LABEL: @firewall( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I:%.*]] = load i8, ptr [[DATA:%.*]], align 1 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 64 @@ -1117,6 +1156,7 @@ define i32 @test_builtin_fpclassify(float %x) { ; CHECK-LABEL: @test_builtin_fpclassify( +; NOSPECULATE-LABEL: @test_builtin_fpclassify( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ISZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00 ; CHECK-NEXT: br i1 [[ISZERO]], label [[FPCLASSIFY_END:%.*]], label [[FPCLASSIFY_NOT_ZERO:%.*]] diff --git a/llvm/test/Transforms/SimplifyCFG/pipeline-delay-speculation-pgo.ll b/llvm/test/Transforms/SimplifyCFG/pipeline-delay-speculation-pgo.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/pipeline-delay-speculation-pgo.ll @@ -0,0 +1,59 @@ +;; Test that the pipelines delay simplify CFG speculation until after +;; pgo annotation. + +; RUN: touch %t.profdata + +;; Test the default optimization pipeline with no PGO, instrumentation PGO and sample PGO. +;; The first should not disable simplify CFG speculation, the latter two should before +;; profile annotation. +; RUN: opt -passes='default' -print-pipeline-passes -S < %s | FileCheck %s --implicit-check-not=no-speculate-blocks --check-prefix=NOPGO +; RUN: opt -passes='default' -pgo-kind=pgo-instr-use-pipeline -profile-file=%t.profdata -print-pipeline-passes -S < %s | FileCheck %s --check-prefixes=PGO,INSTRPGO +; RUN: opt -passes='default' -pgo-kind=pgo-sample-use-pipeline -profile-file=%t.profdata -print-pipeline-passes -S < %s | FileCheck %s --check-prefixes=PGO,SAMPLEPGO + +;; Test the ThinLTO pre-link pipeline with no PGO, instrumentation PGO and sample PGO. +;; The first should not disable simplify CFG speculation, the latter two should before +;; profile annotation. +; RUN: opt -passes='thinlto-pre-link' -print-pipeline-passes -S < %s | FileCheck %s --implicit-check-not=no-speculate-blocks --check-prefix=NOPGO +; RUN: opt -passes='thinlto-pre-link' -pgo-kind=pgo-instr-use-pipeline -profile-file=%t.profdata -print-pipeline-passes -S < %s | FileCheck %s --check-prefixes=PGO,INSTRPGO +; RUN: opt -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file=%t.profdata -print-pipeline-passes -S < %s | FileCheck %s --check-prefixes=PGO,SAMPLEPGO + +;; Test the LTO pre-link pipeline with no PGO, instrumentation PGO and sample PGO. +;; The first should not disable simplify CFG speculation, the latter two should before +;; profile annotation. +; RUN: opt -passes='lto-pre-link' -print-pipeline-passes -S < %s | FileCheck %s --implicit-check-not=no-speculate-blocks --check-prefix=NOPGO +; RUN: opt -passes='lto-pre-link' -pgo-kind=pgo-instr-use-pipeline -profile-file=%t.profdata -print-pipeline-passes -S < %s | FileCheck %s --check-prefixes=PGO,INSTRPGO +; RUN: opt -passes='lto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file=%t.profdata -print-pipeline-passes -S < %s | FileCheck %s --check-prefixes=PGO,SAMPLEPGO + +;; The ThinLTO post-link pipeline will already have performed profile annotation, +;; and should never disable simplifyCFG speculation. +; RUN: opt -passes='thinlto' -print-pipeline-passes -S < %s | FileCheck %s --implicit-check-not=no-speculate-blocks --check-prefix=NOPGO +; RUN: opt -passes='thinlto' -pgo-kind=pgo-instr-use-pipeline -profile-file=%t.profdata -print-pipeline-passes -S < %s | FileCheck %s --implicit-check-not=no-speculate-blocks --check-prefix=NOPGO +; RUN: opt -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -profile-file=%t.profdata -print-pipeline-passes -S < %s | FileCheck %s --implicit-check-not=no-speculate-blocks --check-prefix=NOPGO + +;; The LTO post-link pipeline will already have performed profile annotation, +;; and should never disable simplifyCFG speculation. +; RUN: opt -passes='lto' -print-pipeline-passes -S < %s | FileCheck %s --implicit-check-not=no-speculate-blocks --check-prefix=NOPGO +; RUN: opt -passes='lto' -pgo-kind=pgo-instr-use-pipeline -profile-file=%t.profdata -print-pipeline-passes -S < %s | FileCheck %s --implicit-check-not=no-speculate-blocks --check-prefix=NOPGO +; RUN: opt -passes='lto' -pgo-kind=pgo-sample-use-pipeline -profile-file=%t.profdata -print-pipeline-passes -S < %s | FileCheck %s --implicit-check-not=no-speculate-blocks --check-prefix=NOPGO + +;; Ensure the -always-allow-simplify-cfg-speculation option enables speculation +;; as expected. +; RUN: opt -always-allow-simplify-cfg-speculation -passes='default' -pgo-kind=pgo-instr-use-pipeline -profile-file=%t.profdata -print-pipeline-passes -S < %s | FileCheck %s --implicit-check-not=no-speculate-blocks --check-prefix=NOPGO +; RUN: opt -always-allow-simplify-cfg-speculation -passes='default' -pgo-kind=pgo-sample-use-pipeline -profile-file=%t.profdata -print-pipeline-passes -S < %s | FileCheck %s --implicit-check-not=no-speculate-blocks --check-prefix=NOPGO + +;; With no PGO, we can always speculate (the corresponding FileCheck invocations +;; have an --implicit-check-not=no-speculate-blocks). +; NOPGO: simplifycfg +; NOPGO-SAME: ;speculate-blocks + +;; With either type of PGO, we should disable simplifyCFG speculation before profile +;; annotation and allow it afterwards. +; PGO: simplifycfg +; PGO-NOT: ;speculate-blocks +; PGO-SAME: no-speculate-blocks +; PGO-NOT: ;speculate-blocks +; INSTRPGO-SAME: pgo-instr-use +; SAMPLEPGO-SAME: sample-profile +; PGO-NOT: no-speculate-blocks +; PGO-SAME: ;speculate-blocks +; PGO-NOT: no-speculate-blocks