diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -151,9 +151,10 @@ "speculatively executed instructions")); static cl::opt -MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), - cl::desc("Max size of a block which is still considered " - "small enough to thread through")); + MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, + cl::init(11), + cl::desc("Max size of a block which is still considered " + "small enough to thread through")); // Two is chosen to allow one negation and a logical combine. static cl::opt @@ -2430,19 +2431,32 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { int Size = 0; - for (Instruction &I : BB->instructionsWithoutDebug()) { - if (Size > MaxSmallBlockSize) - return false; // Don't clone large BB's. + SmallPtrSet EphValues; + auto IsEphemeral = [&](const Value *V) { + if (isa(V)) + return true; + return isSafeToSpeculativelyExecute(V) && + all_of(V->users(), + [&](const User *U) { return EphValues.count(U); }); + }; + // Walk the loop in reverse so that we can identify ephemeral values properly + // (values only feeding assumes). + for (Instruction &I : reverse(BB->instructionsWithoutDebug())) { // Can't fold blocks that contain noduplicate or convergent calls. if (CallInst *CI = dyn_cast(&I)) if (CI->cannotDuplicate() || CI->isConvergent()) return false; + // Ignore ephemeral values which are deleted during codegen. + if (IsEphemeral(&I)) + EphValues.insert(&I); // We will delete Phis while threading, so Phis should not be accounted in - // block's size - if (!isa(I)) - ++Size; + // block's size. + else if (!isa(I)) { + if (++Size > MaxSmallBlockSize) + return false; // Don't clone large BB's. + } // We can only support instructions that do not define values that are // live outside of the current basic block. diff --git a/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll b/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll --- a/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll +++ b/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll @@ -1,10 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s -; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s +; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-max-small-block-size=7 -S < %s | FileCheck %s +; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=7 -S < %s | FileCheck %s target datalayout = "e-p:64:64-p5:32:32-A5" declare void @llvm.assume(i1) +declare i1 @llvm.type.test(i8*, metadata) nounwind readnone define void @test_01(i1 %c, i64* align 1 %ptr) local_unnamed_addr #0 { ; CHECK-LABEL: @test_01( @@ -165,3 +166,61 @@ store volatile i64 3, i64* %ptr, align 8 ret void } + +; Try the max block size for PRE again but with the bitcast/type test/assume +; sequence used for whole program devirt. +define void @test_04(i1 %c, i64* align 1 %ptr, [3 x i8*]* %vtable) local_unnamed_addr #0 { +; CHECK-LABEL: @test_04( +; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]] +; CHECK: false1: +; CHECK-NEXT: store volatile i64 1, i64* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[VTABLE:%.*]] = bitcast [3 x i8*]* %vtable to i8* +; CHECK-NEXT: [[P:%.*]] = call i1 @llvm.type.test(i8* [[VTABLE]], metadata !"foo") +; CHECK-NEXT: tail call void @llvm.assume(i1 [[P]]) +; CHECK-NEXT: store volatile i64 0, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 3, i64* [[PTR]], align 8 +; CHECK-NEXT: ret void +; CHECK: true2.critedge: +; CHECK-NEXT: [[VTABLE:%.*]] = bitcast [3 x i8*]* %vtable to i8* +; CHECK-NEXT: [[P:%.*]] = call i1 @llvm.type.test(i8* [[VTABLE]], metadata !"foo") +; CHECK-NEXT: tail call void @llvm.assume(i1 [[P]]) +; CHECK-NEXT: store volatile i64 0, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 2, i64* [[PTR]], align 8 +; CHECK-NEXT: ret void +; + br i1 %c, label %true1, label %false1 + +true1: ; preds = %false1, %0 + %vtablei8 = bitcast [3 x i8*]* %vtable to i8* + %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"foo") + tail call void @llvm.assume(i1 %p) + store volatile i64 0, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + br i1 %c, label %true2, label %false2 + +false1: ; preds = %0 + store volatile i64 1, i64* %ptr, align 4 + br label %true1 + +true2: ; preds = %true1 + store volatile i64 2, i64* %ptr, align 8 + ret void + +false2: ; preds = %true1 + store volatile i64 3, i64* %ptr, align 8 + ret void +}