Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -101,6 +101,10 @@ "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)")); +static cl::opt CheckDomBlockInstructionsCount( + "simplifycfg-db-instr-count", cl::Hidden, cl::init(false), + cl::desc("Do not merge BBs if domblock already has more than phi-node-folding-threshold instructions")); + static cl::opt DupRet( "simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); @@ -2413,6 +2417,25 @@ << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); + // We need to be sure, that DomBlock has + // enough room for new instructions + // First add cost of Select instruction, that will be added to this block + // (this cost is equal to number of phi nodes in BB) + unsigned Cost = NumPhis; + + if (CheckDomBlockInstructionsCount) { + for (const auto& Instr : *DomBlock) + { + if (!isa(&Instr)) + Cost += TTI.getUserCost(&Instr); + } + + if (Cost > PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) { + // DomBlock already too large + return false; + } + } + // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. Instruction *InsertPt = DomBlock->getTerminator(); Index: llvm/test/Transforms/SimplifyCFG/AArch64/check-instr-cost-for-folding.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/AArch64/check-instr-cost-for-folding.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -simplifycfg-db-instr-count=true -mtriple=aarch64-linux-gnu -simplifycfg -S >%t +; RUN: FileCheck %s < %t +; ModuleID = 'test_func.c' + +%struct.ptr_wrapper = type { i32*, i8* } + +@g_wrapper = external dso_local local_unnamed_addr global %struct.ptr_wrapper*, align 8 + +define dso_local i32 @test_func(i64 %in, i64 %bit, i64 %mask) local_unnamed_addr { +entry: + %cmp16 = icmp eq i64 %bit, 0 + br i1 %cmp16, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry +; CHECK-LABEL: for.body.lr.ph: +; CHECK-NOT: select + %0 = load %struct.ptr_wrapper*, %struct.ptr_wrapper** @g_wrapper, align 8 + %proc = getelementptr inbounds %struct.ptr_wrapper, %struct.ptr_wrapper* %0, i64 0, i32 1 + %and2 = and i64 %mask, 1 + %tobool3 = icmp eq i64 %and2, 0 + %and = and i64 %bit, %in + %tobool = icmp eq i64 %and, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %for.body.lr.ph + %or = or i64 0, %bit + %inc = add nsw i32 0, 1 + br label %if.end + +if.end: ; preds = %if.then, %for.body.lr.ph + %retval1.1 = phi i32 [ %inc, %if.then ], [ 0, %for.body.lr.ph ] + %res_in.1 = phi i64 [ %or, %if.then ], [ 0, %for.body.lr.ph ] + %1 = xor i1 %tobool, true + %2 = xor i1 %tobool3, true + %3 = or i1 %1, %2 + store i8* null, i8** %proc, align 8 + %shl = shl i64 %bit, 1 + %cmp = icmp eq i64 %shl, 0 + br label %for.end + +for.end: ; preds = %if.end, %entry + %retval1.0.lcssa = phi i32 [ 0, %entry ], [ %retval1.1, %if.end ] + %res_in.0.lcssa = phi i64 [ 0, %entry ], [ %res_in.1, %if.end ] + %4 = trunc i64 %res_in.0.lcssa to i32 + %conv7 = add i32 %retval1.0.lcssa, %4 + ret i32 %conv7 +}