Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -101,6 +101,10 @@ "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)")); +static cl::opt CheckDomBlockInstructionsCount( + "simplifycfg-db-instr-count", cl::Hidden, cl::init(false), + cl::desc("Do not merge BBs if domblock already has more than phi-node-folding-threshold instructions")); + static cl::opt DupRet( "simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); @@ -2413,6 +2417,24 @@ << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); + // We need to be sure, that DomBlock has + // enough room for new instructions + // First add cost of Select instruction, that will be added to this block + // (this cost is equal to number of phi nodes in BB) + unsigned Cost = NumPhis; + + if (CheckDomBlockInstructionsCount) { + for (const auto& Instr : *DomBlock) { + if (!isa(&Instr)) + Cost += TTI.getUserCost(&Instr); + } + + if (Cost > PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) { + // DomBlock already too large + return false; + } + } + // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. Instruction *InsertPt = DomBlock->getTerminator(); Index: llvm/test/Transforms/SimplifyCFG/AArch64/check-instr-cost-for-folding.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/AArch64/check-instr-cost-for-folding.ll @@ -0,0 +1,84 @@ +; RUN: opt < %s -simplifycfg-db-instr-count=false -mtriple=aarch64-linux-gnu -simplifycfg -S | FileCheck %s --check-prefix=CHECK-MERGE +; RUN: opt < %s -simplifycfg-db-instr-count=true -mtriple=aarch64-linux-gnu -simplifycfg -S | FileCheck %s --check-prefix=CHECK-SEPARATE + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +%struct.ptr_wrapper = type { i64*, i8* } + +; When merge is enabled, 'if.then' and 'if.end' should be merged to 'for.body' +; CHECK-MERGE-LABEL: for.body: +; CHECK-MERGE-NOT: if.then: +; CHECK-MERGE-NOT: if.end: +; And two consequtive instructions 'select' are generated +; CHECK-MERGE: select i1 %tobool +; CHECK-MERGE-NEXT: select i1 %tobool + +; When merge is disabled, instruction 'select' should not be generated +; CHECK-SEPARATE-LABEL: for.body: +; CHECK-SEPARATE-NOT: select +; CHECK-SEPARATE-LABEL: if.then: +; CHECK-SEPARATE-LABEL: if.end: + +; Function Attrs: nofree noinline norecurse nounwind +define dso_local i32 @test_func(%struct.ptr_wrapper* nocapture %wrapper, i32 %def_mask, i32 %bit_to_compare, i32 %bit, i32 %mask) local_unnamed_addr #0 { +entry: + %proc = getelementptr inbounds %struct.ptr_wrapper, %struct.ptr_wrapper* %wrapper, i64 0, i32 1 + %and3 = and i32 %mask, %def_mask + %tobool4 = icmp eq i32 %and3, 0 + br label %for.body + +for.cond.cleanup: ; preds = %if.end7 + %retval1.1.lcssa = phi i32 [ %retval1.1, %if.end7 ] + %res_in.1.lcssa = phi i32 [ %res_in.1, %if.end7 ] + %conv = zext i32 %res_in.1.lcssa to i64 + %res_in8 = getelementptr inbounds %struct.ptr_wrapper, %struct.ptr_wrapper* %wrapper, i64 0, i32 0 + %0 = load i64*, i64** %res_in8, align 8, !tbaa !2 + store i64 %conv, i64* %0, align 8, !tbaa !7 + ret i32 %retval1.1.lcssa + +for.body: ; preds = %if.end7, %entry + %j.022 = phi i32 [ 0, %entry ], [ %inc, %if.end7 ] + %res_in.021 = phi i32 [ 0, %entry ], [ %res_in.1, %if.end7 ] + %retval1.020 = phi i32 [ 0, %entry ], [ %retval1.1, %if.end7 ] + %bit.addr.019 = phi i32 [ %bit, %entry ], [ %shl, %if.end7 ] + %inc = add nuw nsw i32 %j.022, 1 + %shl = shl i32 %bit.addr.019, 1 + %and = and i32 %shl, %bit_to_compare + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %if.end, label %if.then +if.then: ; preds = %for.body + %or = or i32 %res_in.021, %shl + %inc2 = add nsw i32 %retval1.020, 1 + store i8* null, i8** %proc, align 8, !tbaa !9 + br label %if.end + +if.end: ; preds = %for.body, %if.then + %retval1.1 = phi i32 [ %inc2, %if.then ], [ %retval1.020, %for.body ] + %res_in.1 = phi i32 [ %or, %if.then ], [ %res_in.021, %for.body ] + br i1 %tobool4, label %if.end7, label %if.then5 + +if.then5: ; preds = %if.end + store i8* null, i8** %proc, align 8, !tbaa !9 + br label %if.end7 + +if.end7: ; preds = %if.end, %if.then5 + %exitcond = icmp eq i32 %inc, 64 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +attributes #0 = { nofree noinline norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (http://gitlab-msc-rd.rnd.huawei.com/rus-os-team/compilers/llvm/llvm-project.git 65c11cb4cb258372187bc5a959312b62ac705f94)"} +!2 = !{!3, !4, i64 0} +!3 = !{!"", !4, i64 0, !4, i64 8} +!4 = !{!"any pointer", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"long", !5, i64 0} +!9 = !{!3, !4, i64 8}