Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -145,7 +145,7 @@ cl::Hidden); // Heuristic for triangle chains. -static cl::opt TriangleChainCount( +static cl::opt TriangleChainCountOpt( "triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), @@ -340,6 +340,12 @@ /// must be done inline. TailDuplicator TailDup; + /// \brief Number of triangle-shaped-CFG's that need to be in a row for the + /// triangle tail duplication heuristic to kick in. Defaults to the value in + /// TriangleChainCountOpt, unless -O3 is specified, and then it is reduced by + /// 1. + unsigned TriangleChainCount; + /// \brief Allocator and owner of BlockChain structures. /// /// We build BlockChains lazily while processing the loop structure of @@ -2640,12 +2646,21 @@ precomputeTriangleChains(); } + TargetPassConfig *PassConfig = &getAnalysis(); + // For agressive optimization, we can adjust some thresholds to be less + // conservative. + TriangleChainCount = TriangleChainCountOpt; + if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) { + // Apply the triangle heuristic to 1 fewer triangle. + if (TriangleChainCountOpt.getNumOccurrences() == 0) + TriangleChainCount -= 1; + } + assert(BlockToChain.empty()); buildCFGChains(); // Changing the layout can create new tail merging opportunities. - TargetPassConfig *PassConfig = &getAnalysis(); // TailMerge can create jump into if branches that make CFG irreducible for // HW that requires structured CFG. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && Index: test/CodeGen/PowerPC/tail-dup-layout.ll =================================================================== --- test/CodeGen/PowerPC/tail-dup-layout.ll +++ test/CodeGen/PowerPC/tail-dup-layout.ll @@ -1,4 +1,5 @@ -; RUN: llc -O2 < %s | FileCheck %s +; RUN: llc -O2 < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-O2 %s +; RUN: llc -O3 < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-O3 %s target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-grtev4-linux-gnu" @@ -95,17 +96,24 @@ } ; Intended layout: -; The chain-of-triangles based duplicating produces the layout +; At -O3, The chain-of-triangles based duplicating produces the layout ; test1 ; test2 ; test3 -; test4 ; optional1 ; optional2 ; optional3 -; optional4 ; exit ; even for 50/50 branches. +; At -O2, The chain-of-triangles heuristic should not apply, producing the +; layout: +; test1 +; optional1 +; test2 +; optional2 +; test3 +; optional3 +; exit ; Tail duplication puts test n+1 at the end of optional n ; so optional1 includes a copy of test2 at the end, and branches ; to test3 (at the top) or falls through to optional 2. @@ -116,29 +124,38 @@ ; test1 may have been merged with entry ;CHECK: mr [[TAGREG:[0-9]+]], 3 ;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] -;CHECK-NEXT: # %test2 -;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 -;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] -;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 -;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 -;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] -;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4 -;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 -;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]] -;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit -;CHECK: blr -;CHECK-NEXT: .[[OPT1LABEL]]: -;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 -;CHECK-NEXT: beq 0, .[[TEST3LABEL]] -;CHECK-NEXT: .[[OPT2LABEL]]: -;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 -;CHECK-NEXT: beq 0, .[[TEST4LABEL]] -;CHECK-NEXT: .[[OPT3LABEL]]: -;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 -;CHECK-NEXT: beq 0, .[[EXITLABEL]] -;CHECK-NEXT: .[[OPT4LABEL]]: -;CHECK: b .[[EXITLABEL]] +;CHECK-O3-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] +;CHECK-O3-NEXT: # %test2 +;CHECK-O3-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK-O3-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] +;CHECK-O3-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 +;CHECK-O3-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 +;CHECK-O3-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] +;CHECK-O3-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit +;CHECK-O3: blr +;CHECK-O3-NEXT: .[[OPT1LABEL]]: +;CHECK-O3: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK-O3-NEXT: beq 0, .[[TEST3LABEL]] +;CHECK-O3-NEXT: .[[OPT2LABEL]]: +;CHECK-O3: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 +;CHECK-O3-NEXT: beq 0, .[[EXITLABEL]] +;CHECK-O3-NEXT: .[[OPT3LABEL]]: +;CHECK-O3: b .[[EXITLABEL]] + +;CHECK-O2-NEXT: bc 4, 1, .[[TEST2LABEL:[_0-9A-Za-z]+]] +;CHECK-O2: bl a +;CHECK-O2-NOT: rlwinm +;CHECK-O2: .[[TEST2LABEL]]: # %test2 +;CHECK-O2-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK-O2-NEXT: beq 0, .[[TEST3LABEL:[_0-9A-Za-z]+]] +;CHECK-O2: bl b +;CHECK-O2-NOT: rlwinm +;CHECK-O2: .[[TEST3LABEL]]: # %test3 +;CHECK-O2-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 +;CHECK-O2-NEXT: beq 0, .[[EXITLABEL:[_0-9A-Za-z]+]] +;CHECK-O2: bl c +;CHECK-O2: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit +;CHECK-O2: blr define void @straight_test_50(i32 %tag) { entry: @@ -160,16 +177,9 @@ test3: %tagbit3 = and i32 %tag, 4 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 - br i1 %tagbit3eq0, label %test4, label %optional3, !prof !2 + br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1 optional3: call void @c() - br label %test4 -test4: - %tagbit4 = and i32 %tag, 8 - %tagbit4eq0 = icmp eq i32 %tagbit4, 0 - br i1 %tagbit4eq0, label %exit, label %optional4, !prof !1 -optional4: - call void @d() br label %exit exit: ret void