Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -133,6 +133,11 @@ cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions")); +static cl::opt +MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), + cl::desc("Max size of a block which is still considered " + "small enough to thread through")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -2187,9 +2192,12 @@ unsigned Size = 0; for (Instruction &I : BB->instructionsWithoutDebug()) { - if (Size > 10) + if (Size > MaxSmallBlockSize) return false; // Don't clone large BB's. - ++Size; + // We insert PRE Phis purely for threading, so Phis should not be + // accounted in block's size. + if (!isa(I)) + ++Size; // We can only support instructions that do not define values that are // live outside of the current basic block. Index: llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll +++ llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll @@ -16,31 +16,32 @@ define void @caller1(i1 %c, i64* align 1 %ptr) { ; ASSUMPTIONS-OFF-LABEL: @caller1( -; ASSUMPTIONS-OFF-NEXT: br i1 [[C:%.*]], label [[TRUE2:%.*]], label [[FALSE2:%.*]] -; ASSUMPTIONS-OFF: true2: -; ASSUMPTIONS-OFF-NEXT: store volatile i64 0, i64* [[PTR:%.*]], align 8 +; ASSUMPTIONS-OFF-NEXT: br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]] +; ASSUMPTIONS-OFF: false1: +; ASSUMPTIONS-OFF-NEXT: store volatile i64 1, i64* [[PTR:%.*]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 0, i64* [[PTR]], align 8 ; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 ; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 ; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 ; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 ; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 -; ASSUMPTIONS-OFF-NEXT: store volatile i64 2, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 3, i64* [[PTR]], align 4 ; ASSUMPTIONS-OFF-NEXT: ret void -; ASSUMPTIONS-OFF: false2: -; ASSUMPTIONS-OFF-NEXT: store volatile i64 1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF: true2.critedge: ; ASSUMPTIONS-OFF-NEXT: store volatile i64 0, i64* [[PTR]], align 8 ; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 ; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 ; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 ; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 ; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 -; ASSUMPTIONS-OFF-NEXT: store volatile i64 3, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 2, i64* [[PTR]], align 4 ; ASSUMPTIONS-OFF-NEXT: ret void ; ; ASSUMPTIONS-ON-LABEL: @caller1( ; ASSUMPTIONS-ON-NEXT: br i1 [[C:%.*]], label [[TRUE1:%.*]], label [[FALSE1:%.*]] ; ASSUMPTIONS-ON: true1: -; ASSUMPTIONS-ON-NEXT: [[C_PR:%.*]] = phi i1 [ false, [[FALSE1]] ], [ true, [[TMP0:%.*]] ] ; ASSUMPTIONS-ON-NEXT: [[PTRINT:%.*]] = ptrtoint i64* [[PTR:%.*]] to i64 ; ASSUMPTIONS-ON-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7 ; ASSUMPTIONS-ON-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 @@ -51,7 +52,8 @@ ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 -; ASSUMPTIONS-ON-NEXT: br i1 [[C_PR]], label [[TRUE2:%.*]], label [[FALSE2:%.*]] +; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; ASSUMPTIONS-ON-NEXT: br i1 [[C]], label [[TRUE2:%.*]], label [[FALSE2:%.*]] ; ASSUMPTIONS-ON: false1: ; ASSUMPTIONS-ON-NEXT: store volatile i64 1, i64* [[PTR]], align 4 ; ASSUMPTIONS-ON-NEXT: br label [[TRUE1]] @@ -71,6 +73,7 @@ store volatile i64 -1, i64* %ptr store volatile i64 -1, i64* %ptr store volatile i64 -1, i64* %ptr + store volatile i64 -1, i64* %ptr br i1 %c2, label %true2, label %false2 false1: Index: llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll +++ llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -simplifycfg -S < %s | FileCheck %s -; RUN: opt -passes=simplify-cfg -S < %s | FileCheck %s +; RUN: opt -simplifycfg -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s +; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s target datalayout = "e-p:64:64-p5:32:32-A5" @@ -50,14 +50,71 @@ ret void } -; FIXME: SimplifyCFG is doing something weird here. It should have split the -; blocks like in the test above, but instead it creates .pr Phi node which -; only complicates things. +; Corner case: the block has max possible size for which we still do PRE. define void @test_02(i1 %c, i64* align 1 %ptr) local_unnamed_addr #0 { ; CHECK-LABEL: @test_02( +; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]] +; CHECK: false1: +; CHECK-NEXT: store volatile i64 1, i64* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i64* [[PTR]] to i64 +; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7 +; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: store volatile i64 0, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 3, i64* [[PTR]], align 8 +; CHECK-NEXT: ret void +; CHECK: true2.critedge: +; CHECK-NEXT: [[PTRINT_C:%.*]] = ptrtoint i64* [[PTR]] to i64 +; CHECK-NEXT: [[MASKEDPTR_C:%.*]] = and i64 [[PTRINT_C]], 7 +; CHECK-NEXT: [[MASKCOND_C:%.*]] = icmp eq i64 [[MASKEDPTR_C]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND_C]]) +; CHECK-NEXT: store volatile i64 0, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 2, i64* [[PTR]], align 8 +; CHECK-NEXT: ret void +; + br i1 %c, label %true1, label %false1 + +true1: ; preds = %false1, %0 + %ptrint = ptrtoint i64* %ptr to i64 + %maskedptr = and i64 %ptrint, 7 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + store volatile i64 0, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + br i1 %c, label %true2, label %false2 + +false1: ; preds = %0 + store volatile i64 1, i64* %ptr, align 4 + br label %true1 + +true2: ; preds = %true1 + store volatile i64 2, i64* %ptr, align 8 + ret void + +false2: ; preds = %true1 + store volatile i64 3, i64* %ptr, align 8 + ret void +} + +; This block is too huge for PRE. +define void @test_03(i1 %c, i64* align 1 %ptr) local_unnamed_addr #0 { +; CHECK-LABEL: @test_03( ; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE1:%.*]], label [[FALSE1:%.*]] ; CHECK: true1: -; CHECK-NEXT: [[C_PR:%.*]] = phi i1 [ [[C]], [[FALSE1]] ], [ true, [[TMP0:%.*]] ] ; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i64* [[PTR:%.*]] to i64 ; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7 ; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 @@ -68,7 +125,8 @@ ; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 ; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 ; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 -; CHECK-NEXT: br i1 [[C_PR]], label [[TRUE2:%.*]], label [[FALSE2:%.*]] +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: br i1 [[C]], label [[TRUE2:%.*]], label [[FALSE2:%.*]] ; CHECK: false1: ; CHECK-NEXT: store volatile i64 1, i64* [[PTR]], align 4 ; CHECK-NEXT: br label [[TRUE1]] @@ -92,6 +150,7 @@ store volatile i64 -1, i64* %ptr, align 8 store volatile i64 -1, i64* %ptr, align 8 store volatile i64 -1, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 br i1 %c, label %true2, label %false2 false1: ; preds = %0