diff --git a/llvm/include/llvm/CodeGen/MachineSizeOpts.h b/llvm/include/llvm/CodeGen/MachineSizeOpts.h --- a/llvm/include/llvm/CodeGen/MachineSizeOpts.h +++ b/llvm/include/llvm/CodeGen/MachineSizeOpts.h @@ -23,14 +23,17 @@ class MachineFunction; /// Returns true if machine function \p MF is suggested to be size-optimized -/// base on the profile. -bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *BFI); +/// base on the profile, false if not. Returns None if there is no info the +/// profile. +Optional shouldOptimizeForSize(const MachineFunction *MF, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *BFI); /// Returns true if machine basic block \p MBB is suggested to be size-optimized -/// base on the profile. -bool shouldOptimizeForSize(const MachineBasicBlock *MBB, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *MBFI); +/// base on the profile, false if not. Returns None if there is no info the +/// profile. +Optional shouldOptimizeForSize(const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI); } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/llvm/include/llvm/Transforms/Utils/SizeOpts.h --- a/llvm/include/llvm/Transforms/Utils/SizeOpts.h +++ b/llvm/include/llvm/Transforms/Utils/SizeOpts.h @@ -33,16 +33,17 @@ class Function; class ProfileSummaryInfo; -template -bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI, - BFIT *BFI) { +template +Optional shouldFuncOptimizeForSizeImpl(const FuncT *F, + ProfileSummaryInfo *PSI, + BFIT *BFI) { assert(F); if (!PSI || !BFI || !PSI->hasProfileSummary()) - return false; + return None; if (ForcePGSO) return true; if (!EnablePGSO) - return false; + return None; if (PGSOColdCodeOnly || (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) { // Even if the working set size isn't large, size-optimize cold code. @@ -53,16 +54,16 @@ F, PSI, *BFI); } -template -bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI, - BFIT *BFI) { +template +Optional shouldOptimizeForSizeImpl(const BlockT *BB, + ProfileSummaryInfo *PSI, BFIT *BFI) { assert(BB); if (!PSI || !BFI || !PSI->hasProfileSummary()) - return false; + return None; if (ForcePGSO) return true; if (!EnablePGSO) - return false; + return None; if (PGSOColdCodeOnly || (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) { // Even if the working set size isn't large, size-optimize cold code. @@ -74,15 +75,15 @@ } /// Returns true if function \p F is suggested to be size-optimized base on the -/// profile. -bool shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI); +/// profile, false if not. Returns None if there is no info the profile. +Optional shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI); /// Returns true if basic block \p BB is suggested to be size-optimized base -/// on the profile. -bool shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI); - +/// on the profile, false if not. Returns None if there is no info the profile. +Optional shouldOptimizeForSize(const BasicBlock *BB, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI); } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_SIZEOPTS_H diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp --- a/llvm/lib/CodeGen/MachineSizeOpts.cpp +++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp @@ -105,16 +105,16 @@ }; } // end anonymous namespace -bool llvm::shouldOptimizeForSize(const MachineFunction *MF, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *MBFI) { - return shouldFuncOptimizeForSizeImpl( - MF, PSI, MBFI); +Optional +llvm::shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return shouldFuncOptimizeForSizeImpl(MF, PSI, + MBFI); } -bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *MBFI) { - return shouldOptimizeForSizeImpl( - MBB, PSI, MBFI); +Optional +llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return shouldOptimizeForSizeImpl(MBB, PSI, MBFI); } diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1442,7 +1442,7 @@ // getEstimatedNumberOfCaseClusters() in BasicTTIImpl. const bool OptForSize = SI->getParent()->getParent()->hasOptSize() || - llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI); + llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI).getValueOr(false); const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); const unsigned MaxJumpTableSize = getMaximumJumpTableSize(); diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -553,8 +553,9 @@ unsigned NumUses = 0; bool OptForSize = Entry->getParent()->hasOptSize() || - llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI); - if (!OptForSize || std::distance(S,E) > 100) { + llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI) + .getValueOr(false); + if (!OptForSize || std::distance(S, E) > 100) { for (auto ConstCand = S; ConstCand != E; ++ConstCand) { NumUses += ConstCand->Uses.size(); if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost) diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -544,8 +544,9 @@ auto *HeaderBB = L->getHeader(); auto *F = HeaderBB->getParent(); - bool OptForSize = F->hasOptSize() || - llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI); + bool OptForSize = + F->hasOptSize() || + llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI).getValueOr(false); if (OptForSize) { LLVM_DEBUG( dbgs() << "Versioning is needed but not allowed when optimizing " diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -212,8 +212,9 @@ TTI.getUnrollingPreferences(L, SE, UP); // Apply size attributes - bool OptForSize = L->getHeader()->getParent()->hasOptSize() || - llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI); + bool OptForSize = + L->getHeader()->getParent()->hasOptSize() || + llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI).getValueOr(false); if (OptForSize) { UP.Threshold = UP.OptSizeThreshold; UP.PartialThreshold = UP.PartialOptSizeThreshold; diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -2715,8 +2715,9 @@ // Don't rewrite fputs to fwrite when optimising for size because fwrite // requires more arguments and thus extra MOVs are required. - bool OptForSize = CI->getFunction()->hasOptSize() || - llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); + bool OptForSize = + CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI).getValueOr(false); if (OptForSize) return nullptr; diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp --- a/llvm/lib/Transforms/Utils/SizeOpts.cpp +++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp @@ -69,12 +69,14 @@ }; } // end anonymous namespace -bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI) { +Optional llvm::shouldOptimizeForSize(const Function *F, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { return shouldFuncOptimizeForSizeImpl(F, PSI, BFI); } -bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI) { +Optional llvm::shouldOptimizeForSize(const BasicBlock *BB, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { return shouldOptimizeForSizeImpl(BB, PSI, BFI); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7429,9 +7429,10 @@ ScalarEvolution *SE, DominatorTree *DT, const LoopAccessInfo *LAI) { ScalarEpilogueLowering SEL = CM_ScalarEpilogueAllowed; + Optional IsColdByProfile = + llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI); if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && - (F->hasOptSize() || - llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI))) + (F->hasOptSize() || IsColdByProfile.getValueOr(false))) SEL = CM_ScalarEpilogueNotAllowedOptSize; else if (PreferPredicateOverEpilog || Hints.getPredicate() == LoopVectorizeHints::FK_Enabled || diff --git a/llvm/test/Transforms/LoopVectorize/hot_short_tc_loop.ll b/llvm/test/Transforms/LoopVectorize/hot_short_tc_loop.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/hot_short_tc_loop.ll @@ -0,0 +1,205 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes="print,loop-vectorize" -S < %s 2>&1 | FileCheck %s + +; Check vectorization of hot short trip count with epilog. In this case inner +; loop trip count is not constant and its value is estimated by profile. + +; ModuleID = 'test.cpp' +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = dso_local global [5 x i32] zeroinitializer, align 16 +@b = dso_local global [5 x i32] zeroinitializer, align 16 + +; Function Attrs: uwtable +define dso_local void @_Z3fooi(i32 %M) local_unnamed_addr #0 !prof !11 { +; CHECK: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP15:%.*]] +; CHECK: [[TMP18:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD]], [[VEC_IND6:%.*]] +; CHECK: [[WIDE_LOAD10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP23:%.*]] +; CHECK: [[TMP26:%.*]] = add nsw <4 x i32> [[WIDE_LOAD10]], [[TMP18]] +; CHECK: store <4 x i32> [[TMP26]], <4 x i32>* [[TMP28:%.*]] +; +entry: + %a = alloca [5 x i32], align 16 + %b = alloca [5 x i32], align 16 + %0 = bitcast [5 x i32]* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %0) #3 + %1 = bitcast [5 x i32]* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %1) #3 + %arraydecay = getelementptr inbounds [5 x i32], [5 x i32]* %a, i64 0, i64 0 + br label %for.body.us.preheader + +for.body.us.preheader: ; preds = %entry + %wide.trip.count = zext i32 %M to i64 + br label %for.body.us + +for.body.us: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.body.us.preheader + %j.019.us = phi i32 [ %inc8.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.body.us.preheader ] + call void @_Z3barPi(i32* nonnull %arraydecay) + br label %for.body4.us + +for.body4.us: ; preds = %for.body4.us, %for.body.us + %indvars.iv = phi i64 [ 0, %for.body.us ], [ %indvars.iv.next, %for.body4.us ] + %arrayidx.us = getelementptr inbounds [5 x i32], [5 x i32]* %b, i64 0, i64 %indvars.iv + %2 = load i32, i32* %arrayidx.us, align 4, !tbaa !2 + %3 = trunc i64 %indvars.iv to i32 + %mul.us = mul nsw i32 %2, %3 + %arrayidx6.us = getelementptr inbounds [5 x i32], [5 x i32]* %a, i64 0, i64 %indvars.iv + %4 = load i32, i32* %arrayidx6.us, align 4, !tbaa !2 + %add.us = add nsw i32 %4, %mul.us + store i32 %add.us, i32* %arrayidx6.us, align 4, !tbaa !2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us, !prof !10 + +for.cond1.for.cond.cleanup3_crit_edge.us: ; preds = %for.body4.us + %inc8.us = add nuw nsw i32 %j.019.us, 1 + %exitcond21 = icmp eq i32 %inc8.us, 20 + br i1 %exitcond21, label %for.cond.cleanup.loopexit, label %for.body.us, !prof !12 + +for.cond.cleanup.loopexit: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us + br label %for.cond.cleanup + +for.cond.cleanup.loopexit24: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit24, %for.cond.cleanup.loopexit + call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %1) #3 + call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %0) #3 + ret void +} + +; Check vectorization of hot short trip count with epilog. In this case inner +; loop trip count is known constant value. + +; Function Attrs: uwtable +define dso_local void @_Z3fooi2() local_unnamed_addr #0 !prof !11 { +; CHECK: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP15:%.*]] +; CHECK: [[TMP18:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD]], [[VEC_IND6:%.*]] +; CHECK: [[WIDE_LOAD10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP23:%.*]] +; CHECK: [[TMP26:%.*]] = add nsw <4 x i32> [[WIDE_LOAD10]], [[TMP18]] +; CHECK: store <4 x i32> [[TMP26]], <4 x i32>* [[TMP28:%.*]] +; +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.cond.cleanup3 + ret void + +for.body: ; preds = %entry, %for.cond.cleanup3 + %j.018 = phi i32 [ 0, %entry ], [ %inc8, %for.cond.cleanup3 ] + tail call void @_Z3barPi(i32* getelementptr inbounds ([5 x i32], [5 x i32]* @a, i64 0, i64 0)) + br label %for.body4 + +for.cond.cleanup3: ; preds = %for.body4 + %inc8 = add nuw nsw i32 %j.018, 1 + %cmp = icmp ult i32 %inc8, 1000 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !prof !13 + +for.body4: ; preds = %for.body, %for.body4 + %i.017 = phi i32 [ 0, %for.body ], [ %inc, %for.body4 ] + %idxprom = zext i32 %i.017 to i64 + %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4, !tbaa !2 + %mul = mul nsw i32 %0, %i.017 + %arrayidx6 = getelementptr inbounds [5 x i32], [5 x i32]* @a, i64 0, i64 %idxprom + %1 = load i32, i32* %arrayidx6, align 4, !tbaa !2 + %add = add nsw i32 %1, %mul + store i32 %add, i32* %arrayidx6, align 4, !tbaa !2 + %inc = add nuw nsw i32 %i.017, 1 + %cmp2 = icmp ult i32 %inc, 5 + br i1 %cmp2, label %for.body4, label %for.cond.cleanup3 +} + +; This is negative test. Check that vectorization is not performed for COLD +; short trip count loop requiring epilog. Note that outer loop has only 20 +; iterations and there is no associated profile info. + + +; Function Attrs: uwtable +define dso_local void @_Z3fooi3(i32 %M) local_unnamed_addr #0 !prof !11 { +; CHECK: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_US:%.*]] +; CHECK: [[MUL_US:%.*]] = mul nsw i32 [[TMP2]], [[TMP3:%.*]] +; CHECK: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX6_US:%.*]] +; CHECK: [[ADD_US:%.*]] = add nsw i32 [[TMP4]], [[MUL_US]] +; CHECK: store i32 [[ADD_US]], i32* [[ARRAYIDX6_US]] +; +entry: + %a = alloca [5 x i32], align 16 + %b = alloca [5 x i32], align 16 + %0 = bitcast [5 x i32]* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %0) #3 + %1 = bitcast [5 x i32]* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %1) #3 + %arraydecay = getelementptr inbounds [5 x i32], [5 x i32]* %a, i64 0, i64 0 + br label %for.body.us.preheader + +for.body.us.preheader: ; preds = %entry + %wide.trip.count = zext i32 %M to i64 + br label %for.body.us + +for.body.us: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.body.us.preheader + %j.019.us = phi i32 [ %inc8.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.body.us.preheader ] + call void @_Z3barPi(i32* nonnull %arraydecay) + br label %for.body4.us + +for.body4.us: ; preds = %for.body4.us, %for.body.us + %indvars.iv = phi i64 [ 0, %for.body.us ], [ %indvars.iv.next, %for.body4.us ] + %arrayidx.us = getelementptr inbounds [5 x i32], [5 x i32]* %b, i64 0, i64 %indvars.iv + %2 = load i32, i32* %arrayidx.us, align 4, !tbaa !2 + %3 = trunc i64 %indvars.iv to i32 + %mul.us = mul nsw i32 %2, %3 + %arrayidx6.us = getelementptr inbounds [5 x i32], [5 x i32]* %a, i64 0, i64 %indvars.iv + %4 = load i32, i32* %arrayidx6.us, align 4, !tbaa !2 + %add.us = add nsw i32 %4, %mul.us + store i32 %add.us, i32* %arrayidx6.us, align 4, !tbaa !2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us, !prof !14 + +for.cond1.for.cond.cleanup3_crit_edge.us: ; preds = %for.body4.us + %inc8.us = add nuw nsw i32 %j.019.us, 1 + %exitcond21 = icmp eq i32 %inc8.us, 20 + br i1 %exitcond21, label %for.cond.cleanup.loopexit, label %for.body.us + +for.cond.cleanup.loopexit: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us + br label %for.cond.cleanup + +for.cond.cleanup.loopexit24: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit24, %for.cond.cleanup.loopexit + call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %1) #3 + call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %0) #3 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +declare dso_local void @_Z3barPi(i32*) local_unnamed_addr + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +attributes #0 = { "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project f379dd57b978c4e1483d721f422c79e3c0c5ccdc)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C++ TBAA"} +!6 = distinct !{!6, !7} +!7 = !{!"llvm.loop.isvectorized", i32 1} +!8 = distinct !{!8, !9, !7} +!9 = !{!"llvm.loop.unroll.runtime.disable"} +!10 = !{!"branch_weights", i32 999, i32 4995} +!11 = !{!"function_entry_count", i64 1} +!12 = !{!"branch_weights", i32 1, i32 999} +!13 = !{!"branch_weights", i32 1000, i32 1} +!14 = !{!"branch_weights", i32 9, i32 45} diff --git a/llvm/unittests/Target/X86/MachineSizeOptsTest.cpp b/llvm/unittests/Target/X86/MachineSizeOptsTest.cpp --- a/llvm/unittests/Target/X86/MachineSizeOptsTest.cpp +++ b/llvm/unittests/Target/X86/MachineSizeOptsTest.cpp @@ -113,13 +113,13 @@ ASSERT_TRUE(iter == BB0.succ_end()); MachineBasicBlock *BB3 = *BB1->succ_begin(); ASSERT_TRUE(BB3 == *BB2->succ_begin()); - EXPECT_FALSE(shouldOptimizeForSize(F, &PSI, MBFI_F)); - EXPECT_TRUE(shouldOptimizeForSize(G, &PSI, MBFI_G)); - EXPECT_FALSE(shouldOptimizeForSize(H, &PSI, MBFI_H)); - EXPECT_FALSE(shouldOptimizeForSize(&BB0, &PSI, MBFI_F)); - EXPECT_FALSE(shouldOptimizeForSize(BB1, &PSI, MBFI_F)); - EXPECT_TRUE(shouldOptimizeForSize(BB2, &PSI, MBFI_F)); - EXPECT_FALSE(shouldOptimizeForSize(BB3, &PSI, MBFI_F)); + EXPECT_FALSE(shouldOptimizeForSize(F, &PSI, MBFI_F).getValueOr(false)); + EXPECT_TRUE(shouldOptimizeForSize(G, &PSI, MBFI_G).getValueOr(false)); + EXPECT_FALSE(shouldOptimizeForSize(H, &PSI, MBFI_H).getValueOr(false)); + EXPECT_FALSE(shouldOptimizeForSize(&BB0, &PSI, MBFI_F).getValueOr(false)); + EXPECT_FALSE(shouldOptimizeForSize(BB1, &PSI, MBFI_F).getValueOr(false)); + EXPECT_TRUE(shouldOptimizeForSize(BB2, &PSI, MBFI_F).getValueOr(false)); + EXPECT_FALSE(shouldOptimizeForSize(BB3, &PSI, MBFI_F).getValueOr(false)); } const char* MachineSizeOptsTest::MIRString = R"MIR( diff --git a/llvm/unittests/Transforms/Utils/SizeOptsTest.cpp b/llvm/unittests/Transforms/Utils/SizeOptsTest.cpp --- a/llvm/unittests/Transforms/Utils/SizeOptsTest.cpp +++ b/llvm/unittests/Transforms/Utils/SizeOptsTest.cpp @@ -68,13 +68,13 @@ BasicBlock *BB3 = BB1->getSingleSuccessor(); EXPECT_TRUE(PSI.hasProfileSummary()); - EXPECT_FALSE(shouldOptimizeForSize(F, &PSI, BFI_F)); - EXPECT_TRUE(shouldOptimizeForSize(G, &PSI, BFI_G)); - EXPECT_FALSE(shouldOptimizeForSize(H, &PSI, BFI_H)); - EXPECT_FALSE(shouldOptimizeForSize(&BB0, &PSI, BFI_F)); - EXPECT_FALSE(shouldOptimizeForSize(BB1, &PSI, BFI_F)); - EXPECT_TRUE(shouldOptimizeForSize(BB2, &PSI, BFI_F)); - EXPECT_FALSE(shouldOptimizeForSize(BB3, &PSI, BFI_F)); + EXPECT_FALSE(shouldOptimizeForSize(F, &PSI, BFI_F).getValueOr(false)); + EXPECT_TRUE(shouldOptimizeForSize(G, &PSI, BFI_G).getValueOr(false)); + EXPECT_FALSE(shouldOptimizeForSize(H, &PSI, BFI_H).getValueOr(false)); + EXPECT_FALSE(shouldOptimizeForSize(&BB0, &PSI, BFI_F).getValueOr(false)); + EXPECT_FALSE(shouldOptimizeForSize(BB1, &PSI, BFI_F).getValueOr(false)); + EXPECT_TRUE(shouldOptimizeForSize(BB2, &PSI, BFI_F).getValueOr(false)); + EXPECT_FALSE(shouldOptimizeForSize(BB3, &PSI, BFI_F).getValueOr(false)); } const char* SizeOptsTest::IRString = R"IR(