This is an archive of the discontinued LLVM Phabricator instance.

Differential D22551

CodeGen: If Convert blocks that would form a diamond when tail-merged.
ClosedPublic

Authored by iteratee on Jul 19 2016, 4:28 PM.

Download Raw Diff

Details

Reviewers

davidxl

Summary

Some if conversion currently requires tail-merging to have run first.

As an example the following function currently relies on tail-merging for if
conversion to succeed. The common tail of cond_true and cond_false is
extracted, and this then forms a diamond pattern that can be
successfully if converted.

If this block does not get extracted, either because tail-merging is
disabled or the threshold is higher, we should still recognize this
pattern and if-convert it.

define i32 @t2(i32 %a, i32 %b) nounwind {
entry:
      %tmp1434 = icmp eq i32 %a, %b           ; <i1> [#uses=1]
      br i1 %tmp1434, label %bb17, label %bb.outer

bb.outer:               ; preds = %cond_false, %entry
      %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]
      %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]
      br label %bb

bb:             ; preds = %cond_true, %bb.outer
      %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]
      %tmp. = sub i32 0, %b_addr.021.0.ph
      %tmp.40 = mul i32 %indvar, %tmp.
      %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph
      %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph
      br i1 %tmp3, label %cond_true, label %cond_false

cond_true:              ; preds = %bb
      %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph
      %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph
      %indvar.next = add i32 %indvar, 1
      br i1 %tmp1437, label %bb17, label %bb

cond_false:             ; preds = %bb
      %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0
      %tmp14 = icmp eq i32 %a_addr.026.0, %tmp10
      br i1 %tmp14, label %bb17, label %bb.outer

bb17:           ; preds = %cond_false, %cond_true, %entry
      %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]
      ret i32 %a_addr.026.1
}

Without tail-merging or diamond-tail if conversion:

LBB1_1:                                 @ %bb
                                      @ =>This Inner Loop Header: Depth=1
      cmp     r0, r1
      ble     LBB1_3
@ BB#2:                                 @ %cond_true
                                      @   in Loop: Header=BB1_1 Depth=1
      subs    r0, r0, r1
      cmp     r1, r0
      it      ne
      cmpne   r0, r1
      bgt     LBB1_4
LBB1_3:                                 @ %cond_false
                                      @   in Loop: Header=BB1_1 Depth=1
      subs    r1, r1, r0
      cmp     r1, r0
      bne     LBB1_1
LBB1_4:                                 @ %bb17
      bx      lr

With diamond-tail if conversion, but without tail-merging:

@ BB#0:                                 @ %entry
      cmp     r0, r1
      it      eq
      bxeq    lr
LBB1_1:                                 @ %bb
                                      @ =>This Inner Loop Header: Depth=1
      cmp     r0, r1
      ite     le
      suble   r1, r1, r0
      subgt   r0, r0, r1
      cmp     r1, r0
      bne     LBB1_1
@ BB#2:                                 @ %bb17
      bx      lr

Diff Detail

Event Timeline

iteratee updated this revision to Diff 64599.Jul 19 2016, 4:28 PM

iteratee retitled this revision from to CodeGen: If Convert blocks that would form a diamond when tail-merged..

iteratee updated this object.

iteratee added a reviewer: davidxl.

iteratee set the repository for this revision to rL LLVM.

iteratee updated this object.

iteratee added subscribers: echristo, chandlerc, timshen, llvm-commits.

Removed debugging, tidied spacing, added comments.

I am torn about this change. While this looks like a useful thing to do, I suspect this is not the right way to approach the problem.

The example actually confirms the fact the pre-layout tailmerging is a good normalization/enabler pass for later optimizations. This is the reason why it should be run with lower threshold enabling as much optimization as possible, and later let TailDup to undo those that do not bring benefit and to improve layout.

Another question is that whether this patch can handle more cases where (tailMerge + ifcvt) can not handle. If not, it seems to me the patch seems to have duplicated logics (e.g, counting dups) in tailMerge which is not the right approach.

Is this patch required to enable your tailDup enhancement patch? I don't think this one is essential for it. We can probably focus on getting your tailDup patch in first (it is very close to get -- probably just to make your latest tailMerge tuning to be enabled only in post layout mode?)

In D22551#490028, @davidxl wrote:

I am torn about this change. While this looks like a useful thing to do, I suspect this is not the right way to approach the problem.

The example actually confirms the fact the pre-layout tailmerging is a good normalization/enabler pass for later optimizations. This is the reason why it should be run with lower threshold enabling as much optimization as possible, and later let TailDup to undo those that do not bring benefit and to improve layout.

I couldn't find any of the original commits about tail merging that refer to normalization. It's always about reducing code size. Also, it's unusual for a canonicalization pass to have a threshold.

Another question is that whether this patch can handle more cases where (tailMerge + ifcvt) can not handle. If not, it seems to me the patch seems to have duplicated logics (e.g, counting dups) in tailMerge which is not the right approach.

Two things:

The code has to count duplicates anyway. Look at how IfConvertDiamond is written.
There is a precedent for teaching optimization passes to look deeper, even if there is a canonicalization pass that would prevent the need.

Is this patch required to enable your tailDup enhancement patch? I don't think this one is essential for it. We can probably focus on getting your tailDup patch in first (it is very close to get -- probably just to make your latest tailMerge tuning to be enabled only in post layout mode?)

Not specifically. If tailMerge is made less aggressive only during layout, then this is not necessary.

Refactor shared code between Diamond and Diamond with shared tail.

The validateDiamond refactoring change can be split out from the functional change into a different patch .

davidxl added inline comments.Jul 21 2016, 3:58 PM

lib/CodeGen/IfConversion.cpp
845	Document the difference between this pattern vs ValidDiamond? Also since there is no common tail shared between truebb and falsebb, the shape is not really 'Diamond'. Perfhaps make it named "ValidDiamondWithTailCommonned' ? to indicate the shape will be diamond if the tail is commonned?
863	is 'fallthough' check needed here?
873	Can this check be moved earlier? If so, common check can be extracted and shared across this and ValidDiamond.
887	Merge these two : if (TF == FT && TT == FF) { if (! Reversable) return false; reverse ... }
905	Better move this out of line to improve readability.
910	I have not looked in details here. Is there existing code that can be refactored/reused by any chance?

Move closure to member function.

Renamed to ForkedDiamond, and add more documentation about how it differs from standard diamond.

lib/CodeGen/IfConversion.cpp
840	I renamed it ForkedDiamond, and added more comments about it.
863	No. It's just checking if the branch is analyzable.
873	It's short enough that I don't think it's worth factoring out. If there were more checks in common, then maybe, but there aren't.
910	The function that is the most similar is ScanInstructions. I think they're different enough that having them share code would be more confusing than helpful.

The code looks in pretty good shape now. I find the test case is little missing -- how about adding some more (including negative one)?

lib/CodeGen/IfConversion.cpp
91	nit: with a common tail that can be shared
500	Split out the independent fix with a test case if possible
1043	Document the parameters.
1043	Why can't this function be folded into existing FeasbilityAnalysis with a new flag : hasCommonForkedTail (which defaults to false) ?
1890	Why is this check not done for the forked case?

Cleanups in response to comments.
Removed DebugLocation propagation.

lib/CodeGen/IfConversion.cpp
500	I've removed it. Finding a test case is enough work that I just don't have time.
1890	Good catch.

Add negative test.

iteratee added a child revision: D22317: Codegen: Tail Merge: Be less aggressive with special cases..Jul 27 2016, 5:08 PM

Is there anything else that I need to do for this patch?

davidxl added inline comments.Jul 30 2016, 8:32 PM

lib/CodeGen/IfConversion.cpp
734	The variable names here does not seem to match the control flow graph drawn in the comment. Please make it consistent.
771	Clean up the comment -- last instruction of what?
772	Is this a good assumption to make? Any assert can be added countDuplicatedInstructions?
804	This skip code pattern has appeared many times -- good candidate to extract into an inline function.
812	Why is this check not done outside of this function (in ValidForkedForkedDiamond before countDuplicatedInstuctions as in ValidDiamond ?
817	Why is this not already computed?
820	feasbilityAnalysis already checks isUnpredicable bit -- why is it still done here?
1184	This code looks almost exactly the same as the regular diamond case. Perhaps defined a lamba function auto DiamondFinder = [&](decltype(&IfConverter::ValidDiamond) Checker) { if (CanRevCond && (this->*Checker(..) && ...) { .... } }; DiamondFinder(&IfConverter::ValidDiamond); DiamondFinder(&IfConverter::ValidForkedDiamond);

Refactor and comments.

lib/CodeGen/IfConversion.cpp
734	Which comment specifically? The names in the graph are member names of BBInfo, and are assumed to coincide. Here we can't make that assumption. The names are also logical: TT = TrueBBI.TrueBB, TF = TrueBBI.FalseBB, etc.
772	I've elaborated in the comment. The size is computed by ScanInstructions, and the duplicated portion is subtracted off, so there's no point in recomputing the size, we would get the same answer.
812	Because countDuplicatedInstructions adjusts the iterators so that we know exactly which instructions are duplicated. We only worry about the non-duplicated instructions that clobber the predicate info.
817	See above.
820	Same reason as above.
1184	It would take too many parameters, and the code would be less legible. I would have to pass in TrueBBICalc, FalseBBICalc, hasCommonTail, and then conditionalize the calls, because ValidDiamond takes a different number of arguments from ValidForkedDiamond. I don't think it would be cleaner.

Missed changes from last patch. (Refactor and comments.)

davidxl added inline comments.Aug 2 2016, 10:24 AM

lib/CodeGen/IfConversion.cpp
734	Ok -- the naming convention makes sense.
772	My question in this comment is that countDuplicatiedInstruction does not document exit state of TIE and FIE, so add assertions to make sure TIE and FIE point to what you expect to see (pointing to identical instructions before and not identical after ?)
812	Should this be done for ValidDiamond too -- only check the non-shared portion? Also I don't think it is ideal to have code duplication like this. Looks like you should re-use scanInstructions or part of it (by making it accepting BIB and BIE) ?
1184	I still think refactoring is better -- the main reason of doing the refactoring is to avoid code duplication which is better longer term. For instance, no need to worry about fixing bugs in multiple different places. Another point is that Diamond and ForkedDiamond patterns are exclusive, so there is no need to do forked diamond check after diamond check returns true. In other words, the code can be further simplified to: if (CanRevCond) { if (!DiamondFinder(...::ValidDiamond)) { DiamondFilnder(... ::ValidForkedDiamond); } Also it seems to me you don't need to introduce TrueBBICalc and FalseBBICalc. How about in ValidForkedDiamond saving the initial value of TrueBBI and FalseBBI re-scan region of BB and update TrueBBI and FalseBBI if ValidForkedDiamond fails, restore TrueBBI etc value before returning.

More refactors and comments.

I split ScanInstructions in two, made the actual scan take iterator bounds, and removed RecaculateCostsAndClobbers.

lib/CodeGen/IfConversion.cpp
1184	I factored out the feasibility analysis. Take a look now.

More refactoring.

lgtm

When commtting, I suggest you carve out the refactoring changes into one or more NFC patches (debug skip for one, scanning for one, and the rest of refactoring) before committing the functional change.

This revision is now accepted and ready to land.Aug 5 2016, 12:30 PM

iteratee added a parent revision: D22796: [ADT] Add make_scope_exit()..Aug 9 2016, 1:24 PM

Added bug fix from the revert.

Herald added a subscriber: nemanjai. · View Herald TranscriptAug 10 2016, 2:15 PM

iteratee closed this revision.Aug 11 2016, 2:21 PM

Committed in: r278287

I have a possible fix for the broken self hosting bot. I've split the fixed patch into 3: Fix (which can apply to top), Rescan diamonds, and Forked diamond. I'm going to re-commit all 3 and let the bots churn through them again.

Recommitted in r279670 and r279671

Revision Contents

Path

Size

lib/

CodeGen/

IfConversion.cpp

416 lines

test/

CodeGen/

PowerPC/

ifcvt-forked-bug-2016-08-08.ll

36 lines

Thumb2/

thumb2-ifcvt1.ll

47 lines

Diff 67595

lib/CodeGen/IfConversion.cpp

Show All 9 Lines
// This file implements the machine instruction level if-conversion pass, which		// This file implements the machine instruction level if-conversion pass, which
// tries to convert conditional branches into predicated instructions.		// tries to convert conditional branches into predicated instructions.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/Passes.h"		#include "llvm/CodeGen/Passes.h"
#include "BranchFolding.h"		#include "BranchFolding.h"
#include "llvm/ADT/STLExtras.h"		#include "llvm/ADT/STLExtras.h"
		#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"		#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"		#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"		#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"		#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"		#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"		#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"		#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"		#include "llvm/CodeGen/MachineModuleInfo.h"
Show All 27 Lines
static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",		static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
cl::init(false), cl::Hidden);		cl::init(false), cl::Hidden);
static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",		static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
cl::init(false), cl::Hidden);		cl::init(false), cl::Hidden);
static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",		static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
cl::init(false), cl::Hidden);		cl::init(false), cl::Hidden);
static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",		static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
cl::init(false), cl::Hidden);		cl::init(false), cl::Hidden);
		static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond",
		cl::init(false), cl::Hidden);
static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",		static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
cl::init(true), cl::Hidden);		cl::init(true), cl::Hidden);

STATISTIC(NumSimple, "Number of simple if-conversions performed");		STATISTIC(NumSimple, "Number of simple if-conversions performed");
STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");		STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
STATISTIC(NumTriangle, "Number of triangle if-conversions performed");		STATISTIC(NumTriangle, "Number of triangle if-conversions performed");
STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");		STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");		STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");		STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");		STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
		STATISTIC(NumForkedDiamonds, "Number of forked-diamond if-conversions performed");
STATISTIC(NumIfConvBBs, "Number of if-converted blocks");		STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
STATISTIC(NumDupBBs, "Number of duplicated blocks");		STATISTIC(NumDupBBs, "Number of duplicated blocks");
STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");		STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");

namespace {		namespace {
class IfConverter : public MachineFunctionPass {		class IfConverter : public MachineFunctionPass {
enum IfcvtKind {		enum IfcvtKind {
ICNotClassfied, // BB data valid, but not classified.		ICNotClassfied, // BB data valid, but not classified.
ICSimpleFalse, // Same as ICSimple, but on the false path.		ICSimpleFalse, // Same as ICSimple, but on the false path.
ICSimple, // BB is entry of an one split, no rejoin sub-CFG.		ICSimple, // BB is entry of an one split, no rejoin sub-CFG.
ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.		ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.
ICTriangleRev, // Same as ICTriangle, but true path rev condition.		ICTriangleRev, // Same as ICTriangle, but true path rev condition.
ICTriangleFalse, // Same as ICTriangle, but on the false path.		ICTriangleFalse, // Same as ICTriangle, but on the false path.
ICTriangle, // BB is entry of a triangle sub-CFG.		ICTriangle, // BB is entry of a triangle sub-CFG.
ICDiamond // BB is entry of a diamond sub-CFG.		ICDiamond, // BB is entry of a diamond sub-CFG.
		ICForkedDiamond // BB is entry of an almost diamond sub-CFG, with a
		// common tail that can be shared.
		davidxlUnsubmitted Done Reply Inline Actions nit: with a common tail that can be shared davidxl: nit: with a common tail that can be shared
};		};

/// BBInfo - One per MachineBasicBlock, this is used to cache the result		/// BBInfo - One per MachineBasicBlock, this is used to cache the result
/// if-conversion feasibility analysis. This includes results from		/// if-conversion feasibility analysis. This includes results from
/// TargetInstrInfo::analyzeBranch() (i.e. TBB, FBB, and Cond), and its		/// TargetInstrInfo::analyzeBranch() (i.e. TBB, FBB, and Cond), and its
/// classification, and common tail block of its successors (if it's a		/// classification, and common tail block of its successors (if it's a
/// diamond shape), its size, whether it's predicable, and whether any		/// diamond shape), its size, whether it's predicable, and whether any
/// instruction can clobber the 'would-be' predicate.		/// instruction can clobber the 'would-be' predicate.
Show All 15 Lines	class IfConverter : public MachineFunctionPass {
/// BrCond - Conditions for end of block conditional branches.		/// BrCond - Conditions for end of block conditional branches.
/// Predicate - Predicate used in the BB.		/// Predicate - Predicate used in the BB.
struct BBInfo {		struct BBInfo {
bool IsDone : 1;		bool IsDone : 1;
bool IsBeingAnalyzed : 1;		bool IsBeingAnalyzed : 1;
bool IsAnalyzed : 1;		bool IsAnalyzed : 1;
bool IsEnqueued : 1;		bool IsEnqueued : 1;
bool IsBrAnalyzable : 1;		bool IsBrAnalyzable : 1;
		bool IsBrReversible : 1;
bool HasFallThrough : 1;		bool HasFallThrough : 1;
bool IsUnpredicable : 1;		bool IsUnpredicable : 1;
bool CannotBeCopied : 1;		bool CannotBeCopied : 1;
bool ClobbersPred : 1;		bool ClobbersPred : 1;
unsigned NonPredSize;		unsigned NonPredSize;
unsigned ExtraCost;		unsigned ExtraCost;
unsigned ExtraCost2;		unsigned ExtraCost2;
MachineBasicBlock *BB;		MachineBasicBlock *BB;
MachineBasicBlock *TrueBB;		MachineBasicBlock *TrueBB;
MachineBasicBlock *FalseBB;		MachineBasicBlock *FalseBB;
SmallVector<MachineOperand, 4> BrCond;		SmallVector<MachineOperand, 4> BrCond;
SmallVector<MachineOperand, 4> Predicate;		SmallVector<MachineOperand, 4> Predicate;
BBInfo() : IsDone(false), IsBeingAnalyzed(false),		BBInfo() : IsDone(false), IsBeingAnalyzed(false),
IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),		IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
HasFallThrough(false), IsUnpredicable(false),		IsBrReversible(false), HasFallThrough(false),
CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),		IsUnpredicable(false), CannotBeCopied(false),
ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr),		ClobbersPred(false), NonPredSize(0), ExtraCost(0),
		ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
FalseBB(nullptr) {}		FalseBB(nullptr) {}
};		};

/// IfcvtToken - Record information about pending if-conversions to attempt:		/// IfcvtToken - Record information about pending if-conversions to attempt:
/// BBI - Corresponding BBInfo.		/// BBI - Corresponding BBInfo.
/// Kind - Type of block. See IfcvtKind.		/// Kind - Type of block. See IfcvtKind.
/// NeedSubsumption - True if the to-be-predicated BB has already been		/// NeedSubsumption - True if the to-be-predicated BB has already been
/// predicated.		/// predicated.
/// NumDups - Number of instructions that would be duplicated due		/// NumDups - Number of instructions that would be duplicated due
/// to this if-conversion. (For diamonds, the number of		/// to this if-conversion. (For diamonds, the number of
/// identical instructions at the beginnings of both		/// identical instructions at the beginnings of both
/// paths).		/// paths).
/// NumDups2 - For diamonds, the number of identical instructions		/// NumDups2 - For diamonds, the number of identical instructions
/// at the ends of both paths.		/// at the ends of both paths.
struct IfcvtToken {		struct IfcvtToken {
BBInfo &BBI;		BBInfo &BBI;
IfcvtKind Kind;		IfcvtKind Kind;
bool NeedSubsumption;
unsigned NumDups;		unsigned NumDups;
unsigned NumDups2;		unsigned NumDups2;
IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)		bool NeedSubsumption : 1;
: BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}		bool TClobbersPred : 1;
		bool FClobbersPred : 1;
		IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0,
		bool tc = false, bool fc = false)
		: BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s),
		TClobbersPred(tc), FClobbersPred(fc) {}
};		};

/// BBAnalysis - Results of if-conversion feasibility analysis indexed by		/// BBAnalysis - Results of if-conversion feasibility analysis indexed by
/// basic block number.		/// basic block number.
std::vector<BBInfo> BBAnalysis;		std::vector<BBInfo> BBAnalysis;
TargetSchedModel SchedModel;		TargetSchedModel SchedModel;

const TargetLoweringBase *TLI;		const TargetLoweringBase *TLI;
Show All 33 Lines	namespace {
private:		private:
bool ReverseBranchCondition(BBInfo &BBI) const;		bool ReverseBranchCondition(BBInfo &BBI) const;
bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,		bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
BranchProbability Prediction) const;		BranchProbability Prediction) const;
bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,		bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
bool FalseBranch, unsigned &Dups,		bool FalseBranch, unsigned &Dups,
BranchProbability Prediction) const;		BranchProbability Prediction) const;
bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,		bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
unsigned &Dups1, unsigned &Dups2) const;		unsigned &Dups1, unsigned &Dups2,
		BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
		bool ValidForkedDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
		unsigned &Dups1, unsigned &Dups2,
		BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
void AnalyzeBranches(BBInfo &BBI);		void AnalyzeBranches(BBInfo &BBI);
void ScanInstructions(BBInfo &BBI,		void ScanInstructions(BBInfo &BBI,
MachineBasicBlock::iterator &Begin,		MachineBasicBlock::iterator &Begin,
MachineBasicBlock::iterator &End) const;		MachineBasicBlock::iterator &End) const;
		bool RescanInstructions(
		MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
		MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
		BBInfo &TrueBBI, BBInfo &FalseBBI) const;
void AnalyzeBlock(MachineBasicBlock *MBB,		void AnalyzeBlock(MachineBasicBlock *MBB,
std::vector<std::unique_ptr<IfcvtToken>> &Tokens);		std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,		bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
bool isTriangle = false, bool RevBranch = false);		bool isTriangle = false, bool RevBranch = false,
		bool hasCommonTail = false);
void AnalyzeBlocks(MachineFunction &MF,		void AnalyzeBlocks(MachineFunction &MF,
std::vector<std::unique_ptr<IfcvtToken>> &Tokens);		std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
void InvalidatePreds(MachineBasicBlock *BB);		void InvalidatePreds(MachineBasicBlock *BB);
void RemoveExtraEdges(BBInfo &BBI);		void RemoveExtraEdges(BBInfo &BBI);
bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);		bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);		bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
		bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
		unsigned NumDups1, unsigned NumDups2,
		bool TClobbersPred, bool FClobbersPred,
		bool RemoveTrueBranch, bool RemoveFalseBranch,
		bool MergeAddEdges);
bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,		bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
unsigned NumDups1, unsigned NumDups2);		unsigned NumDups1, unsigned NumDups2);
		bool IfConvertForkedDiamond(BBInfo &BBI, IfcvtKind Kind,
		unsigned NumDups1, unsigned NumDups2,
		bool TClobbers, bool FClobbers);
void PredicateBlock(BBInfo &BBI,		void PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,		MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,		SmallVectorImpl<MachineOperand> &Cond,
SmallSet<unsigned, 4> *LaterRedefs = nullptr);		SmallSet<unsigned, 4> *LaterRedefs = nullptr);
void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,		void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
SmallVectorImpl<MachineOperand> &Cond,		SmallVectorImpl<MachineOperand> &Cond,
bool IgnoreBr = false);		bool IgnoreBr = false);
void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);		void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
▲ Show 20 Lines • Show All 175 Lines • ▼ Show 20 Lines	while (!Tokens.empty()) {
DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"		DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
<< BBI.TrueBB->getNumber() << ",F:"		<< BBI.TrueBB->getNumber() << ",F:"
<< BBI.FalseBB->getNumber() << ") ");		<< BBI.FalseBB->getNumber() << ") ");
RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);		RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");		DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) ++NumDiamonds;		if (RetVal) ++NumDiamonds;
break;		break;
}		}
		case ICForkedDiamond: {
		if (DisableForkedDiamond) break;
		DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#"
		<< BBI.BB->getNumber() << " (T:"
		<< BBI.TrueBB->getNumber() << ",F:"
		<< BBI.FalseBB->getNumber() << ") ");
		RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2,
		Token->TClobbersPred,
		Token->FClobbersPred);
		DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
		if (RetVal) ++NumForkedDiamonds;
		break;
		}
}		}

Change \|= RetVal;		Change \|= RetVal;

NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +		NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
NumTriangleFalse + NumTriangleFRev + NumDiamonds;		NumTriangleFalse + NumTriangleFRev + NumDiamonds;
if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)		if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
break;		break;
Show All 29 Lines	static MachineBasicBlock findFalseBlock(MachineBasicBlock BB,
}		}
return nullptr;		return nullptr;
}		}

/// ReverseBranchCondition - Reverse the condition of the end of the block		/// ReverseBranchCondition - Reverse the condition of the end of the block
/// branch. Swap block's 'true' and 'false' successors.		/// branch. Swap block's 'true' and 'false' successors.
bool IfConverter::ReverseBranchCondition(BBInfo &BBI) const {		bool IfConverter::ReverseBranchCondition(BBInfo &BBI) const {
DebugLoc dl; // FIXME: this is nowhere		DebugLoc dl; // FIXME: this is nowhere
if (!TII->ReverseBranchCondition(BBI.BrCond)) {		if (!TII->ReverseBranchCondition(BBI.BrCond)) {
		davidxlUnsubmitted Not Done Reply Inline Actions Split out the independent fix with a test case if possible davidxl: Split out the independent fix with a test case if possible
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions I've removed it. Finding a test case is enough work that I just don't have time. iteratee: I've removed it. Finding a test case is enough work that I just don't have time.
TII->RemoveBranch(*BBI.BB);		TII->RemoveBranch(*BBI.BB);
TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);		TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
std::swap(BBI.TrueBB, BBI.FalseBB);		std::swap(BBI.TrueBB, BBI.FalseBB);
return true;		return true;
}		}
return false;		return false;
}		}

▲ Show 20 Lines • Show All 139 Lines • ▼ Show 20 Lines	static void countDuplicatedInstructions(
// If both blocks are returning don't skip the branches, since they will		// If both blocks are returning don't skip the branches, since they will
// likely be both identical return instructions. In such cases the return		// likely be both identical return instructions. In such cases the return
// can be left unpredicated.		// can be left unpredicated.
// Check for already containing all of the block.		// Check for already containing all of the block.
if (TIB == TIE \|\| FIB == FIE)		if (TIB == TIE \|\| FIB == FIE)
return;		return;
--TIE;		--TIE;
--FIE;		--FIE;
		// Upon exit TIE and FIE will both point at the last non-shared instruction,
		// they need to be moved forward to point past the last non-shared
		// instruction.
		auto IncrementEndIteratorsOnExit = make_scope_exit([&]() {
		++TIE; ++FIE;
		});

if (!TBB.succ_empty() \|\| !FBB.succ_empty()) {		if (!TBB.succ_empty() \|\| !FBB.succ_empty()) {
if (SkipConditionalBranches) {		if (SkipConditionalBranches) {
while (TIE != TIB && TIE->isBranch())		while (TIE != TIB && TIE->isBranch())
--TIE;		--TIE;
while (FIE != FIB && FIE->isBranch())		while (FIE != FIB && FIE->isBranch())
--FIE;		--FIE;
} else {		} else {
while (TIE != TIB && TIE->isUnconditionalBranch())		while (TIE != TIB && TIE->isUnconditionalBranch())
Show All 21 Lines	while (TIE != TIB && FIE != FIB) {
// still don't want to count them.		// still don't want to count them.
if (SkipConditionalBranches \|\| !TIE->isBranch())		if (SkipConditionalBranches \|\| !TIE->isBranch())
++Dups2;		++Dups2;
--TIE;		--TIE;
--FIE;		--FIE;
}		}
}		}

		/// RescanInstructions - Run ScanInstructions on a pair of blocks.
		/// @param TIB - True Iterator Begin, points to first non-shared instruction
		/// @param FIB - False Iterator Begin, points to first non-shared instruction
		/// @param TIE - True Iterator End, points past last non-shared instruction
		/// @param FIE - False Iterator End, points past last non-shared instruction
		/// @param TrueBBI - BBInfo to update for the true block.
		/// @param FalseBBI - BBInfo to update for the false block.
		/// @returns - false if either block cannot be predicated or if both blocks end
		/// with a predicate-clobbering instruction.
		bool IfConverter::RescanInstructions(
		MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
		MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
		BBInfo &TrueBBI, BBInfo &FalseBBI) const {
		ScanInstructions(TrueBBI, TIB, TIE);
		if (TrueBBI.IsUnpredicable)
		return false;
		ScanInstructions(FalseBBI, FIB, FIE);
		if (FalseBBI.IsUnpredicable)
		return false;
		if (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)
		return false;
		return true;
		}

		/// ValidForkedDiamond - Returns true if the 'true' and 'false' blocks (along
		/// with their common predecessor) form a diamond if a common tail block is
		/// extracted.
		/// While not strictly a diamond, this pattern would form a diamond if
		/// tail-merging had merged the shared tails.
		/// EBB
		/// _/ \_
		/// \| \|
		/// TBB FBB
		/// / \ / \
		/// FalseBB TrueBB FalseBB
		davidxlUnsubmitted Not Done Reply Inline Actions The variable names here does not seem to match the control flow graph drawn in the comment. Please make it consistent. davidxl: The variable names here does not seem to match the control flow graph drawn in the comment.
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions Which comment specifically? The names in the graph are member names of BBInfo, and are assumed to coincide. Here we can't make that assumption. The names are also logical: TT = TrueBBI.TrueBB, TF = TrueBBI.FalseBB, etc. iteratee: Which comment specifically? The names in the graph are member names of BBInfo, and are assumed…
		davidxlUnsubmitted Not Done Reply Inline Actions Ok -- the naming convention makes sense. davidxl: Ok -- the naming convention makes sense.
		/// Currently only handles analyzable branches.
		/// Specifically excludes actual diamonds to avoid overlap.
		bool IfConverter::ValidForkedDiamond(
		BBInfo &TrueBBI, BBInfo &FalseBBI,
		unsigned &Dups1, unsigned &Dups2,
		BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
		Dups1 = Dups2 = 0;
		if (TrueBBI.IsBeingAnalyzed \|\| TrueBBI.IsDone \|\|
		FalseBBI.IsBeingAnalyzed \|\| FalseBBI.IsDone)
		return false;

		if (!TrueBBI.IsBrAnalyzable \|\| !FalseBBI.IsBrAnalyzable)
		return false;
		// Don't IfConvert blocks that can't be folded into their predecessor.
		if (TrueBBI.BB->pred_size() > 1 \|\| FalseBBI.BB->pred_size() > 1)
		return false;

		// This function is specifically looking for conditional tails, as
		// unconditional tails are already handled by the standard diamond case.
		if (TrueBBI.BrCond.size() == 0 \|\|
		FalseBBI.BrCond.size() == 0)
		return false;

		MachineBasicBlock *TT = TrueBBI.TrueBB;
		MachineBasicBlock *TF = TrueBBI.FalseBB;
		MachineBasicBlock *FT = FalseBBI.TrueBB;
		MachineBasicBlock *FF = FalseBBI.FalseBB;

		if (!TT)
		TT = getNextBlock(TrueBBI.BB);
		if (!TF)
		TF = getNextBlock(TrueBBI.BB);
		if (!FT)
		FT = getNextBlock(FalseBBI.BB);
		if (!FF)
		FF = getNextBlock(FalseBBI.BB);

		davidxlUnsubmitted Not Done Reply Inline Actions Clean up the comment -- last instruction of what? davidxl: Clean up the comment -- last instruction of what?
		if (!TT \|\| !TF)
		davidxlUnsubmitted Not Done Reply Inline Actions Is this a good assumption to make? Any assert can be added countDuplicatedInstructions? davidxl: Is this a good assumption to make? Any assert can be added countDuplicatedInstructions?
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions I've elaborated in the comment. The size is computed by ScanInstructions, and the duplicated portion is subtracted off, so there's no point in recomputing the size, we would get the same answer. iteratee: I've elaborated in the comment. The size is computed by ScanInstructions, and the duplicated…
		davidxlUnsubmitted Not Done Reply Inline Actions My question in this comment is that countDuplicatiedInstruction does not document exit state of TIE and FIE, so add assertions to make sure TIE and FIE point to what you expect to see (pointing to identical instructions before and not identical after ?) davidxl: My question in this comment is that countDuplicatiedInstruction does not document exit state of…
		return false;

		// Check successors. If they don't match, bail.
		if (!((TT == FT && TF == FF) \|\| (TF == FT && TT == FF)))
		return false;

		bool FalseReversed = false;
		if (TF == FT && TT == FF) {
		// If the branches are opposing, but we can't reverse, don't do it.
		if (!FalseBBI.IsBrReversible)
		return false;
		FalseReversed = true;
		ReverseBranchCondition(FalseBBI);
		}
		auto UnReverseOnExit = make_scope_exit([&]() {
		if (FalseReversed)
		ReverseBranchCondition(FalseBBI);
		});

		// Count duplicate instructions at the beginning of the true and false blocks.
		MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
		MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
		MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
		MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
		countDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
		TrueBBI.BB, FalseBBI.BB,
		/* SkipConditionalBranches */ false);

		TrueBBICalc.BB = TrueBBI.BB;
		FalseBBICalc.BB = FalseBBI.BB;
		if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
		return false;
		davidxlUnsubmitted Done Reply Inline Actions This skip code pattern has appeared many times -- good candidate to extract into an inline function. davidxl: This skip code pattern has appeared many times -- good candidate to extract into an inline…
		// The size is used to decide whether to if-convert, and the shared portions
		// are subtracted off. Because of the subtraction, we just use the size that
		// was calculated by the original ScanInstructions, as it is correct.
		TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
		FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
		return true;
		}

		davidxlUnsubmitted Not Done Reply Inline Actions Why is this check not done outside of this function (in ValidForkedForkedDiamond before countDuplicatedInstuctions as in ValidDiamond ? davidxl: Why is this check not done outside of this function (in ValidForkedForkedDiamond before…
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions Because countDuplicatedInstructions adjusts the iterators so that we know exactly which instructions are duplicated. We only worry about the non-duplicated instructions that clobber the predicate info. iteratee: Because countDuplicatedInstructions adjusts the iterators so that we know exactly which…
		davidxlUnsubmitted Done Reply Inline Actions Should this be done for ValidDiamond too -- only check the non-shared portion? Also I don't think it is ideal to have code duplication like this. Looks like you should re-use scanInstructions or part of it (by making it accepting BIB and BIE) ? davidxl: Should this be done for ValidDiamond too -- only check the non-shared portion? Also I don't…
/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along		/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
/// with their common predecessor) forms a valid diamond shape for ifcvt.		/// with their common predecessor) forms a valid diamond shape for ifcvt.
bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,		bool IfConverter::ValidDiamond(
unsigned &Dups1, unsigned &Dups2) const {		BBInfo &TrueBBI, BBInfo &FalseBBI,
		unsigned &Dups1, unsigned &Dups2,
		davidxlUnsubmitted Not Done Reply Inline Actions Why is this not already computed? davidxl: Why is this not already computed?
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions See above. iteratee: See above.
		BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
Dups1 = Dups2 = 0;		Dups1 = Dups2 = 0;
if (TrueBBI.IsBeingAnalyzed \|\| TrueBBI.IsDone \|\|		if (TrueBBI.IsBeingAnalyzed \|\| TrueBBI.IsDone \|\|
		davidxlUnsubmitted Not Done Reply Inline Actions feasbilityAnalysis already checks isUnpredicable bit -- why is it still done here? davidxl: feasbilityAnalysis already checks isUnpredicable bit -- why is it still done here?
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions Same reason as above. iteratee: Same reason as above.
FalseBBI.IsBeingAnalyzed \|\| FalseBBI.IsDone)		FalseBBI.IsBeingAnalyzed \|\| FalseBBI.IsDone)
return false;		return false;

MachineBasicBlock *TT = TrueBBI.TrueBB;		MachineBasicBlock *TT = TrueBBI.TrueBB;
MachineBasicBlock *FT = FalseBBI.TrueBB;		MachineBasicBlock *FT = FalseBBI.TrueBB;

if (!TT && blockAlwaysFallThrough(TrueBBI))		if (!TT && blockAlwaysFallThrough(TrueBBI))
TT = getNextBlock(TrueBBI.BB);		TT = getNextBlock(TrueBBI.BB);
if (!FT && blockAlwaysFallThrough(FalseBBI))		if (!FT && blockAlwaysFallThrough(FalseBBI))
FT = getNextBlock(FalseBBI.BB);		FT = getNextBlock(FalseBBI.BB);
if (TT != FT)		if (TT != FT)
return false;		return false;
if (!TT && (TrueBBI.IsBrAnalyzable \|\| FalseBBI.IsBrAnalyzable))		if (!TT && (TrueBBI.IsBrAnalyzable \|\| FalseBBI.IsBrAnalyzable))
return false;		return false;
if (TrueBBI.BB->pred_size() > 1 \|\| FalseBBI.BB->pred_size() > 1)		if (TrueBBI.BB->pred_size() > 1 \|\| FalseBBI.BB->pred_size() > 1)
return false;		return false;

// FIXME: Allow true block to have an early exit?		// FIXME: Allow true block to have an early exit?
if (TrueBBI.FalseBB \|\| FalseBBI.FalseBB \|\|		if (TrueBBI.FalseBB \|\| FalseBBI.FalseBB)
(TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
return false;		return false;
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions I renamed it ForkedDiamond, and added more comments about it. iteratee: I renamed it ForkedDiamond, and added more comments about it.

// Count duplicate instructions at the beginning and end of the true and		// Count duplicate instructions at the beginning and end of the true and
// false blocks.		// false blocks.
MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();		MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();		MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
		davidxlUnsubmitted Not Done Reply Inline Actions Document the difference between this pattern vs ValidDiamond? Also since there is no common tail shared between truebb and falsebb, the shape is not really 'Diamond'. Perfhaps make it named "ValidDiamondWithTailCommonned' ? to indicate the shape will be diamond if the tail is commonned? davidxl: Document the difference between this pattern vs ValidDiamond? Also since there is no common…
MachineBasicBlock::iterator TIE = TrueBBI.BB->end();		MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
MachineBasicBlock::iterator FIE = FalseBBI.BB->end();		MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
countDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,		countDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
TrueBBI.BB, FalseBBI.BB,		TrueBBI.BB, FalseBBI.BB,
/* SkipConditionalBranches */ true);		/* SkipConditionalBranches */ true);

		TrueBBICalc.BB = TrueBBI.BB;
		FalseBBICalc.BB = FalseBBI.BB;
		if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
		return false;
		// The size is used to decide whether to if-convert, and the shared portions
		// are subtracted off. Because of the subtraction, we just use the size that
		// was calculated by the original ScanInstructions, as it is correct.
		TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
		FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
return true;		return true;
}		}

		davidxlUnsubmitted Done Reply Inline Actions is 'fallthough' check needed here? davidxl: is 'fallthough' check needed here?
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions No. It's just checking if the branch is analyzable. iteratee: No. It's just checking if the branch is analyzable.
/// AnalyzeBranches - Look at the branches at the end of a block to determine if		/// AnalyzeBranches - Look at the branches at the end of a block to determine if
/// the block is predicable.		/// the block is predicable.
void IfConverter::AnalyzeBranches(BBInfo &BBI) {		void IfConverter::AnalyzeBranches(BBInfo &BBI) {
if (BBI.IsDone)		if (BBI.IsDone)
return;		return;

BBI.TrueBB = BBI.FalseBB = nullptr;		BBI.TrueBB = BBI.FalseBB = nullptr;
BBI.BrCond.clear();		BBI.BrCond.clear();
BBI.IsBrAnalyzable =		BBI.IsBrAnalyzable =
!TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);		!TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
		davidxlUnsubmitted Done Reply Inline Actions Can this check be moved earlier? If so, common check can be extracted and shared across this and ValidDiamond. davidxl: Can this check be moved earlier? If so, common check can be extracted and shared across this…
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions It's short enough that I don't think it's worth factoring out. If there were more checks in common, then maybe, but there aren't. iteratee: It's short enough that I don't think it's worth factoring out. If there were more checks in…
		SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
		BBI.IsBrReversible = (RevCond.size() == 0) \|\|
		!TII->ReverseBranchCondition(RevCond);
BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;		BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;

if (BBI.BrCond.size()) {		if (BBI.BrCond.size()) {
// No false branch. This BB must end with a conditional branch and a		// No false branch. This BB must end with a conditional branch and a
// fallthrough.		// fallthrough.
if (!BBI.FalseBB)		if (!BBI.FalseBB)
BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);		BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
if (!BBI.FalseBB) {		if (!BBI.FalseBB) {
// Malformed bcc? True and false blocks are the same?		// Malformed bcc? True and false blocks are the same?
BBI.IsUnpredicable = true;		BBI.IsUnpredicable = true;
}		}
		davidxlUnsubmitted Done Reply Inline Actions Merge these two : if (TF == FT && TT == FF) { if (! Reversable) return false; reverse ... } davidxl: Merge these two : if (TF == FT && TT == FF) { if (! Reversable) return…
}		}
}		}

/// ScanInstructions - Scan all the instructions in the block to determine if		/// ScanInstructions - Scan all the instructions in the block to determine if
/// the block is predicable. In most cases, that means all the instructions		/// the block is predicable. In most cases, that means all the instructions
/// in the block are isPredicable(). Also checks if the block contains any		/// in the block are isPredicable(). Also checks if the block contains any
/// instruction which can clobber a predicate (e.g. condition code register).		/// instruction which can clobber a predicate (e.g. condition code register).
/// If so, the block is not predicable unless it's the last instruction.		/// If so, the block is not predicable unless it's the last instruction.
void IfConverter::ScanInstructions(BBInfo &BBI,		void IfConverter::ScanInstructions(BBInfo &BBI,
MachineBasicBlock::iterator &Begin,		MachineBasicBlock::iterator &Begin,
MachineBasicBlock::iterator &End) const {		MachineBasicBlock::iterator &End) const {
if (BBI.IsDone \|\| BBI.IsUnpredicable)		if (BBI.IsDone \|\| BBI.IsUnpredicable)
return;		return;

bool AlreadyPredicated = !BBI.Predicate.empty();		bool AlreadyPredicated = !BBI.Predicate.empty();

BBI.NonPredSize = 0;		BBI.NonPredSize = 0;
BBI.ExtraCost = 0;		BBI.ExtraCost = 0;
		davidxlUnsubmitted Done Reply Inline Actions Better move this out of line to improve readability. davidxl: Better move this out of line to improve readability.
BBI.ExtraCost2 = 0;		BBI.ExtraCost2 = 0;
BBI.ClobbersPred = false;		BBI.ClobbersPred = false;
for (; Begin != End; ++Begin) {		for (; Begin != End; ++Begin) {
auto &MI = *Begin;		auto &MI = *Begin;
if (MI.isDebugValue())		if (MI.isDebugValue())
		davidxlUnsubmitted Done Reply Inline Actions I have not looked in details here. Is there existing code that can be refactored/reused by any chance? davidxl: I have not looked in details here. Is there existing code that can be refactored/reused by any…
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions The function that is the most similar is ScanInstructions. I think they're different enough that having them share code would be more confusing than helpful. iteratee: The function that is the most similar is ScanInstructions. I think they're different enough…
continue;		continue;

// It's unsafe to duplicate convergent instructions in this context, so set		// It's unsafe to duplicate convergent instructions in this context, so set
// BBI.CannotBeCopied to true if MI is convergent. To see why, consider the		// BBI.CannotBeCopied to true if MI is convergent. To see why, consider the
// following CFG, which is subject to our "simple" transformation.		// following CFG, which is subject to our "simple" transformation.
//		//
// BB0 // if (c1) goto BB1; else goto BB2;		// BB0 // if (c1) goto BB1; else goto BB2;
// / \		// / \
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines	if (!TII->isPredicable(MI)) {
BBI.IsUnpredicable = true;		BBI.IsUnpredicable = true;
return;		return;
}		}
}		}
}		}

/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be		/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
/// predicated by the specified predicate.		/// predicated by the specified predicate.
		/// @param BBI BBInfo for the block to check
		/// @param Pred Predicate array for the branch that leads to BBI
		/// @param isTriangle true if the Analysis is for a triangle
		/// @param RevBranch true if Reverse(Pred) leads to BBI (e.g. BBI is the false
		/// case
		/// @param hasCommonTail true if BBI shares a tail with a sibling block that
		/// contains any instruction that would make the block unpredicable.
bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,		bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
SmallVectorImpl<MachineOperand> &Pred,		SmallVectorImpl<MachineOperand> &Pred,
bool isTriangle, bool RevBranch) {		bool isTriangle, bool RevBranch,
		bool hasCommonTail) {
// If the block is dead or unpredicable, then it cannot be predicated.		// If the block is dead or unpredicable, then it cannot be predicated.
if (BBI.IsDone \|\| BBI.IsUnpredicable)		// Two blocks may share a common unpredicable tail, but this doesn't prevent
		// them from being if-converted. The non-shared portion is assumed to have
		// been checked
		if (BBI.IsDone \|\| (BBI.IsUnpredicable && !hasCommonTail))
return false;		return false;

// If it is already predicated but we couldn't analyze its terminator, the		// If it is already predicated but we couldn't analyze its terminator, the
// latter might fallthrough, but we can't determine where to.		// latter might fallthrough, but we can't determine where to.
// Conservatively avoid if-converting again.		// Conservatively avoid if-converting again.
if (BBI.Predicate.size() && !BBI.IsBrAnalyzable)		if (BBI.Predicate.size() && !BBI.IsBrAnalyzable)
return false;		return false;

// If it is already predicated, check if the new predicate subsumes		// If it is already predicated, check if the new predicate subsumes
// its predicate.		// its predicate.
if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))		if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
return false;		return false;

if (BBI.BrCond.size()) {		if (!hasCommonTail && BBI.BrCond.size()) {
if (!isTriangle)		if (!isTriangle)
return false;		return false;

// Test predicate subsumption.		// Test predicate subsumption.
SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());		SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());		SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
if (RevBranch) {		if (RevBranch) {
if (TII->ReverseBranchCondition(Cond))		if (TII->ReverseBranchCondition(Cond))
return false;		return false;
}		}
if (TII->ReverseBranchCondition(RevPred) \|\|		if (TII->ReverseBranchCondition(RevPred) \|\|
!TII->SubsumesPredicate(Cond, RevPred))		!TII->SubsumesPredicate(Cond, RevPred))
return false;		return false;
}		}

return true;		return true;
}		}

/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from		/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
/// the specified block. Record its successors and whether it looks like an		/// the specified block. Record its successors and whether it looks like an
/// if-conversion candidate.		/// if-conversion candidate.
void IfConverter::AnalyzeBlock(		void IfConverter::AnalyzeBlock(
		davidxlUnsubmitted Done Reply Inline Actions Document the parameters. davidxl: Document the parameters.
		davidxlUnsubmitted Done Reply Inline Actions Why can't this function be folded into existing FeasbilityAnalysis with a new flag : hasCommonForkedTail (which defaults to false) ? davidxl: Why can't this function be folded into existing FeasbilityAnalysis with a new flag…
MachineBasicBlock *MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {		MachineBasicBlock *MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
struct BBState {		struct BBState {
BBState(MachineBasicBlock *BB) : MBB(BB), SuccsAnalyzed(false) {}		BBState(MachineBasicBlock *BB) : MBB(BB), SuccsAnalyzed(false) {}
MachineBasicBlock *MBB;		MachineBasicBlock *MBB;

/// This flag is true if MBB's successors have been analyzed.		/// This flag is true if MBB's successors have been analyzed.
bool SuccsAnalyzed;		bool SuccsAnalyzed;
};		};
▲ Show 20 Lines • Show All 69 Lines • ▼ Show 20 Lines	while (!BBStack.empty()) {
unsigned Dups = 0;		unsigned Dups = 0;
unsigned Dups2 = 0;		unsigned Dups2 = 0;
bool TNeedSub = !TrueBBI.Predicate.empty();		bool TNeedSub = !TrueBBI.Predicate.empty();
bool FNeedSub = !FalseBBI.Predicate.empty();		bool FNeedSub = !FalseBBI.Predicate.empty();
bool Enqueued = false;		bool Enqueued = false;

BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);		BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);

if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&		if (CanRevCond) {
MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +		BBInfo TrueBBICalc, FalseBBICalc;
TrueBBI.ExtraCost), TrueBBI.ExtraCost2,		auto feasibleDiamond = [&]() {
*FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +		return (
FalseBBI.ExtraCost),FalseBBI.ExtraCost2,		MeetIfcvtSizeLimit(
		*TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) +
		TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2,
		*FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) +
		FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2,
Prediction) &&		Prediction) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&		FeasibilityAnalysis(TrueBBI, BBI.BrCond,
FeasibilityAnalysis(FalseBBI, RevCond)) {		/* IsTriangle / false, / RevCond */ false,
		/* hasCommonTail */ true) &&
		FeasibilityAnalysis(FalseBBI, RevCond,
		/* IsTriangle / false, / RevCond */ false,
		/* hasCommonTail */ true));
		};

		if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
		TrueBBICalc, FalseBBICalc)) {
		if (feasibleDiamond()) {
// Diamond:		// Diamond:
// EBB		// EBB
// / \_		// / \_
// \| \|		// \| \|
// TBB FBB		// TBB FBB
// \ /		// \ /
// TailBB		// TailBB
// Note TailBB can be empty.		// Note TailBB can be empty.
Tokens.push_back(llvm::make_unique<IfcvtToken>(		Tokens.push_back(llvm::make_unique<IfcvtToken>(
BBI, ICDiamond, TNeedSub \| FNeedSub, Dups, Dups2));		BBI, ICDiamond, TNeedSub \| FNeedSub, Dups, Dups2));
Enqueued = true;		Enqueued = true;
}		}
		} else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
		TrueBBICalc, FalseBBICalc)) {
		if (feasibleDiamond()) {
		// ForkedDiamond:
		// if TBB and FBB have a common tail that includes their conditional
		// branch instructions, then we can If Convert this pattern.
		// EBB
		// _/ \_
		// \| \|
		// TBB FBB
		// / \ / \
		// FalseBB TrueBB FalseBB
		//
		Tokens.push_back(llvm::make_unique<IfcvtToken>(
		BBI, ICForkedDiamond, TNeedSub \| FNeedSub, Dups, Dups2,
		(bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
		Enqueued = true;
		}
		}
		}

if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&		if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,		MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
		davidxlUnsubmitted Not Done Reply Inline Actions This code looks almost exactly the same as the regular diamond case. Perhaps defined a lamba function auto DiamondFinder = [&](decltype(&IfConverter::ValidDiamond) Checker) { if (CanRevCond && (this->Checker(..) && ...) { .... } }; DiamondFinder(&IfConverter::ValidDiamond); DiamondFinder(&IfConverter::ValidForkedDiamond); davidxl:* This code looks almost exactly the same as the regular diamond case. Perhaps defined a lamba…
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions It would take too many parameters, and the code would be less legible. I would have to pass in TrueBBICalc, FalseBBICalc, hasCommonTail, and then conditionalize the calls, because ValidDiamond takes a different number of arguments from ValidForkedDiamond. I don't think it would be cleaner. iteratee: It would take too many parameters, and the code would be less legible. I would have to pass in…
		davidxlUnsubmitted Not Done Reply Inline Actions I still think refactoring is better -- the main reason of doing the refactoring is to avoid code duplication which is better longer term. For instance, no need to worry about fixing bugs in multiple different places. Another point is that Diamond and ForkedDiamond patterns are exclusive, so there is no need to do forked diamond check after diamond check returns true. In other words, the code can be further simplified to: if (CanRevCond) { if (!DiamondFinder(...::ValidDiamond)) { DiamondFilnder(... ::ValidForkedDiamond); } Also it seems to me you don't need to introduce TrueBBICalc and FalseBBICalc. How about in ValidForkedDiamond saving the initial value of TrueBBI and FalseBBI re-scan region of BB and update TrueBBI and FalseBBI if ValidForkedDiamond fails, restore TrueBBI etc value before returning. davidxl: I still think refactoring is better -- the main reason of doing the refactoring is to avoid…
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions I factored out the feasibility analysis. Take a look now. iteratee: I factored out the feasibility analysis. Take a look now.
TrueBBI.ExtraCost2, Prediction) &&		TrueBBI.ExtraCost2, Prediction) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {		FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
// Triangle:		// Triangle:
// EBB		// EBB
// \| \_		// \| \_
// \| \|		// \| \|
// \| TBB		// \| TBB
// \| /		// \| /
▲ Show 20 Lines • Show All 432 Lines • ▼ Show 20 Lines	bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
CvtBBI->IsDone = true;		CvtBBI->IsDone = true;
if (FalseBBDead)		if (FalseBBDead)
NextBBI->IsDone = true;		NextBBI->IsDone = true;

// FIXME: Must maintain LiveIns.		// FIXME: Must maintain LiveIns.
return true;		return true;
}		}

/// IfConvertDiamond - If convert a diamond sub-CFG.		/// IfConvertDiamondCommon - Common code shared between diamond conversions.
///		/// BBI, TrueBBI, and FalseBBI form the diamond shape.
bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,		/// NumDups1 - number of shared instructions at the beginning of TrueBBI and
unsigned NumDups1, unsigned NumDups2) {		/// FalseBBI
BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];		/// NumDups2 - number of shared instructions at the end of TrueBBI and FalseBBI
BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];		/// RemoveTrueBranch - Remove the branch of the true block before predicating
MachineBasicBlock *TailBB = TrueBBI.TrueBB;		/// Only false for unanalyzable fallthrough cases.
// True block must fall through or end with an unanalyzable terminator.		/// RemoveFalseBranch - Remove the branch of the false block before predicating
if (!TailBB) {		/// Only false for unanalyzable fallthrough cases.
if (blockAlwaysFallThrough(TrueBBI))		/// MergeAddEdges - Add successor edges when merging blocks. Only false for
TailBB = FalseBBI.TrueBB;		/// unanalyzable fallthrough
assert((TailBB \|\| !TrueBBI.IsBrAnalyzable) && "Unexpected!");		bool IfConverter::IfConvertDiamondCommon(
}		BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
		unsigned NumDups1, unsigned NumDups2,
		bool TClobbersPred, bool FClobbersPred,
		bool RemoveTrueBranch, bool RemoveFalseBranch,
		bool MergeAddEdges) {

if (TrueBBI.IsDone \|\| FalseBBI.IsDone \|\|		if (TrueBBI.IsDone \|\| FalseBBI.IsDone \|\|
TrueBBI.BB->pred_size() > 1 \|\|		TrueBBI.BB->pred_size() > 1 \|\| FalseBBI.BB->pred_size() > 1) {
FalseBBI.BB->pred_size() > 1) {
// Something has changed. It's no longer safe to predicate these blocks.		// Something has changed. It's no longer safe to predicate these blocks.
BBI.IsAnalyzed = false;		BBI.IsAnalyzed = false;
TrueBBI.IsAnalyzed = false;		TrueBBI.IsAnalyzed = false;
FalseBBI.IsAnalyzed = false;		FalseBBI.IsAnalyzed = false;
return false;		return false;
}		}

if (TrueBBI.BB->hasAddressTaken() \|\| FalseBBI.BB->hasAddressTaken())		if (TrueBBI.BB->hasAddressTaken() \|\| FalseBBI.BB->hasAddressTaken())
// Conservatively abort if-conversion if either BB has its address taken.		// Conservatively abort if-conversion if either BB has its address taken.
return false;		return false;

// Put the predicated instructions from the 'true' block before the		// Put the predicated instructions from the 'true' block before the
// instructions from the 'false' block, unless the true block would clobber		// instructions from the 'false' block, unless the true block would clobber
// the predicate, in which case, do the opposite.		// the predicate, in which case, do the opposite.
BBInfo *BBI1 = &TrueBBI;		BBInfo *BBI1 = &TrueBBI;
BBInfo *BBI2 = &FalseBBI;		BBInfo *BBI2 = &FalseBBI;
SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());		SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
if (TII->ReverseBranchCondition(RevCond))		if (TII->ReverseBranchCondition(RevCond))
llvm_unreachable("Unable to reverse branch condition!");		llvm_unreachable("Unable to reverse branch condition!");
SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;		SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
SmallVector<MachineOperand, 4> *Cond2 = &RevCond;		SmallVector<MachineOperand, 4> *Cond2 = &RevCond;

// Figure out the more profitable ordering.		// Figure out the more profitable ordering.
bool DoSwap = false;		bool DoSwap = false;
if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)		if (TClobbersPred && !FClobbersPred)
DoSwap = true;		DoSwap = true;
else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {		else if (TClobbersPred == FClobbersPred) {
if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)		if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
DoSwap = true;		DoSwap = true;
}		}
if (DoSwap) {		if (DoSwap) {
std::swap(BBI1, BBI2);		std::swap(BBI1, BBI2);
std::swap(Cond1, Cond2);		std::swap(Cond1, Cond2);
		std::swap(RemoveTrueBranch, RemoveFalseBranch);
}		}

// Remove the conditional branch from entry to the blocks.		// Remove the conditional branch from entry to the blocks.
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);		BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);

// Initialize liveins to the first BB. These are potentially redefined by		// Initialize liveins to the first BB. These are potentially redefined by
// predicated instructions.		// predicated instructions.
Redefs.init(TRI);		Redefs.init(TRI);
Show All 30 Lines	bool IfConverter::IfConvertDiamondCommon(
for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E;		for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E;
++I) {		++I) {
SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers;		SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers;
Redefs.stepForward(*I, IgnoredClobbers);		Redefs.stepForward(*I, IgnoredClobbers);
}		}
BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);		BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
BBI2->BB->erase(BBI2->BB->begin(), DI2);		BBI2->BB->erase(BBI2->BB->begin(), DI2);

// Remove branch from the 'true' block, unless it was not analyzable.		if (RemoveTrueBranch)
// Non-analyzable branches need to be preserved, since in such cases,
// the CFG structure is not an actual diamond (the join block may not
// be present).
if (BBI1->IsBrAnalyzable)
BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);		BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
// Remove duplicated instructions.		// Remove duplicated instructions.
DI1 = BBI1->BB->end();		DI1 = BBI1->BB->end();
for (unsigned i = 0; i != NumDups2; ) {		for (unsigned i = 0; i != NumDups2; ) {
// NumDups2 only counted non-dbg_value instructions, so this won't		// NumDups2 only counted non-dbg_value instructions, so this won't
// run off the head of the list.		// run off the head of the list.
assert (DI1 != BBI1->BB->begin());		assert (DI1 != BBI1->BB->begin());
--DI1;		--DI1;
// skip dbg_value instructions		// skip dbg_value instructions
if (!DI1->isDebugValue())		if (!DI1->isDebugValue())
++i;		++i;
}		}
BBI1->BB->erase(DI1, BBI1->BB->end());		BBI1->BB->erase(DI1, BBI1->BB->end());

// Kill flags in the true block for registers living into the false block		// Kill flags in the true block for registers living into the false block
// must be removed.		// must be removed.
RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI);		RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI);

// Remove 'false' block branch (unless it was not analyzable), and find		// Remove 'false' block branch, and find the last instruction to predicate.
// the last instruction to predicate.		// Save the debug location.
if (BBI2->IsBrAnalyzable)		if (RemoveFalseBranch)
BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);		BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
DI2 = BBI2->BB->end();		DI2 = BBI2->BB->end();
while (NumDups2 != 0) {		while (NumDups2 != 0) {
// NumDups2 only counted non-dbg_value instructions, so this won't		// NumDups2 only counted non-dbg_value instructions, so this won't
// run off the head of the list.		// run off the head of the list.
assert (DI2 != BBI2->BB->begin());		assert (DI2 != BBI2->BB->begin());
--DI2;		--DI2;
// skip dbg_value instructions		// skip dbg_value instructions
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines	if (BBI1T != BBI1->BB->end() && TII->isPredicated(*BBI1T) &&
BBI2T != BBI2->BB->end() && !TII->isPredicated(*BBI2T))		BBI2T != BBI2->BB->end() && !TII->isPredicated(*BBI2T))
--DI2;		--DI2;
}		}

// Predicate the 'false' block.		// Predicate the 'false' block.
PredicateBlock(BBI2, DI2, Cond2);		PredicateBlock(BBI2, DI2, Cond2);

// Merge the true block into the entry of the diamond.		// Merge the true block into the entry of the diamond.
MergeBlocks(BBI, *BBI1, TailBB == nullptr);		MergeBlocks(BBI, *BBI1, MergeAddEdges);
MergeBlocks(BBI, *BBI2, TailBB == nullptr);		MergeBlocks(BBI, *BBI2, MergeAddEdges);
		return true;
		}

		/// IfConvertForkedDiamond - If convert an almost-diamond sub-CFG where the true
		/// and false blocks share a common tail.
		bool IfConverter::IfConvertForkedDiamond(
		BBInfo &BBI, IfcvtKind Kind,
		unsigned NumDups1, unsigned NumDups2,
		bool TClobbersPred, bool FClobbersPred) {
		BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
		BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];

		// Save the debug location for later.
		DebugLoc dl;
		MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator();
		if (TIE != TrueBBI.BB->end())
		dl = TIE->getDebugLoc();
		// Removing branches from both blocks is safe, because we have already
		// determined that both blocks have the same branch instructions. The branch
		// will be added back at the end, unpredicated.
		if (!IfConvertDiamondCommon(
		BBI, TrueBBI, FalseBBI,
		NumDups1, NumDups2,
		TClobbersPred, FClobbersPred,
		/* RemoveTrueBranch / true, / RemoveFalseBranch */ true,
		/* MergeAddEdges */ true))
		return false;

		// Add back the branch.
		// Debug location saved above when removing the branch from BBI2
		TII->InsertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB,
		TrueBBI.BrCond, dl);

		RemoveExtraEdges(BBI);

		// Update block info.
		BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
		InvalidatePreds(BBI.BB);

		// FIXME: Must maintain LiveIns.
		return true;
		}

		/// IfConvertDiamond - If convert a diamond sub-CFG.
		///
		bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
		unsigned NumDups1, unsigned NumDups2) {
		BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
		BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
		MachineBasicBlock *TailBB = TrueBBI.TrueBB;

		// True block must fall through or end with an unanalyzable terminator.
		if (!TailBB) {
		if (blockAlwaysFallThrough(TrueBBI))
		TailBB = FalseBBI.TrueBB;
		assert((TailBB \|\| !TrueBBI.IsBrAnalyzable) && "Unexpected!");
		}

		if (!IfConvertDiamondCommon(
		davidxlUnsubmitted Not Done Reply Inline Actions Why is this check not done for the forked case? davidxl: Why is this check not done for the forked case?
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions Good catch. iteratee: Good catch.
		BBI, TrueBBI, FalseBBI,
		NumDups1, NumDups2,
		TrueBBI.ClobbersPred, FalseBBI.ClobbersPred,
		/* RemoveTrueBranch */ TrueBBI.IsBrAnalyzable,
		/* RemoveFalseBranch */ FalseBBI.IsBrAnalyzable,
		/* MergeAddEdges */ TailBB == nullptr))
		return false;

// If the if-converted block falls through or unconditionally branches into		// If the if-converted block falls through or unconditionally branches into
// the tail block, and the tail block does not have other predecessors, then		// the tail block, and the tail block does not have other predecessors, then
// fold the tail block in as well. Otherwise, unless it falls through to the		// fold the tail block in as well. Otherwise, unless it falls through to the
// tail, add a unconditional branch to it.		// tail, add a unconditional branch to it.
if (TailBB) {		if (TailBB) {
BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];		BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
bool CanMergeTail = !TailBBI.HasFallThrough &&		bool CanMergeTail = !TailBBI.HasFallThrough &&
!TailBBI.BB->hasAddressTaken();		!TailBBI.BB->hasAddressTaken();
// The if-converted block can still have a predicated terminator		// The if-converted block can still have a predicated terminator
// (e.g. a predicated return). If that is the case, we cannot merge		// (e.g. a predicated return). If that is the case, we cannot merge
// it with the tail block.		// it with the tail block.
MachineBasicBlock::const_iterator TI = BBI.BB->getFirstTerminator();		MachineBasicBlock::const_iterator TI = BBI.BB->getFirstTerminator();
if (TI != BBI.BB->end() && TII->isPredicated(*TI))		if (TI != BBI.BB->end() && TII->isPredicated(*TI))
CanMergeTail = false;		CanMergeTail = false;
// There may still be a fall-through edge from BBI1 or BBI2 to TailBB;		// There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
// check if there are any other predecessors besides those.		// check if there are any other predecessors besides those.
unsigned NumPreds = TailBB->pred_size();		unsigned NumPreds = TailBB->pred_size();
if (NumPreds > 1)		if (NumPreds > 1)
CanMergeTail = false;		CanMergeTail = false;
else if (NumPreds == 1 && CanMergeTail) {		else if (NumPreds == 1 && CanMergeTail) {
MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();		MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
if (PI != BBI1->BB && PI != BBI2->BB)		if (PI != TrueBBI.BB && PI != FalseBBI.BB)
CanMergeTail = false;		CanMergeTail = false;
}		}
if (CanMergeTail) {		if (CanMergeTail) {
MergeBlocks(BBI, TailBBI);		MergeBlocks(BBI, TailBBI);
TailBBI.IsDone = true;		TailBBI.IsDone = true;
} else {		} else {
BBI.BB->addSuccessor(TailBB, BranchProbability::getOne());		BBI.BB->addSuccessor(TailBB, BranchProbability::getOne());
InsertUncondBranch(BBI.BB, TailBB, TII);		InsertUncondBranch(BBI.BB, TailBB, TII);
BBI.HasFallThrough = false;		BBI.HasFallThrough = false;
}		}
}		}

// RemoveExtraEdges won't work if the block has an unanalyzable branch,		// RemoveExtraEdges won't work if the block has an unanalyzable branch,
// which can happen here if TailBB is unanalyzable and is merged, so		// which can happen here if TailBB is unanalyzable and is merged, so
// explicitly remove BBI1 and BBI2 as successors.		// explicitly remove BBI1 and BBI2 as successors.
BBI.BB->removeSuccessor(BBI1->BB);		BBI.BB->removeSuccessor(TrueBBI.BB);
BBI.BB->removeSuccessor(BBI2->BB, true);		BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true);
RemoveExtraEdges(BBI);		RemoveExtraEdges(BBI);

// Update block info.		// Update block info.
BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;		BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
InvalidatePreds(BBI.BB);		InvalidatePreds(BBI.BB);

// FIXME: Must maintain LiveIns.		// FIXME: Must maintain LiveIns.
return true;		return true;
▲ Show 20 Lines • Show All 256 Lines • Show Last 20 Lines

test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll

This file was added.

				; ModuleID = 'bugpoint-reduced-instructions.bc'
				; RUN: llc -O2 -o - %s \| FileCheck %s
				source_filename = "bugpoint-output-9ad75f8.bc"
				target datalayout = "e-m:e-i64:64-n32:64"
				target triple = "powerpc64le-unknown-linux-gnu"

				; Function Attrs: nounwind uwtable
				define hidden void @_ZN11__sanitizer25MaybeStartBackgroudThreadEv() local_unnamed_addr #0 {
				entry:
				br i1 undef, label %land.lhs.true, label %if.end

				; CHECK: # %land.lhs.true
				; CHECK-NEXT: bclr
				; CHECK-NEXT: # %if.end4
				land.lhs.true: ; preds = %entry
				br i1 undef, label %return, label %if.end4

				if.end: ; preds = %entry
				br i1 icmp ne (i32 (i8, i8, i8* (i8), i8) @_ZN11__sanitizer19real_pthread_createEPvS0_PFS0_S0_ES0_, i32 (i8, i8, i8* (i8), i8) null), label %if.end4, label %return

				if.end4: ; preds = %if.end, %land.lhs.true
				%call5 = tail call i8* @_ZN11__sanitizer21internal_start_threadEPFvPvES0_(void (i8) nonnull @_ZN11__sanitizer16BackgroundThreadEPv, i8* null) #7
				unreachable

				return: ; preds = %if.end, %land.lhs.true
				ret void
				}

				declare extern_weak signext i32 @_ZN11__sanitizer19real_pthread_createEPvS0_PFS0_S0_ES0_(i8, i8, i8* (i8), i8*) #2

				declare i8* @_ZN11__sanitizer21internal_start_threadEPFvPvES0_(void (i8), i8*) local_unnamed_addr #2

				declare hidden void @_ZN11__sanitizer16BackgroundThreadEPv(i8* nocapture readnone) #5

				attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
				attributes #7 = { nobuiltin nounwind }

test/CodeGen/Thumb2/thumb2-ifcvt1.ll

; RUN: llc < %s -mtriple=thumbv7-apple-darwin \| FileCheck %s		; RUN: llc < %s -mtriple=thumbv7-apple-darwin \| FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it \| FileCheck %s		; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it \| FileCheck %s
; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it \|FileCheck %s		; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it \| FileCheck %s
		; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it -enable-tail-merge=0 \| FileCheck %s
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {		define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
; CHECK-LABEL: t1:		; CHECK-LABEL: t1:
; CHECK: ittt ne		; CHECK: ittt ne
; CHECK: cmpne		; CHECK: cmpne
; CHECK: addne		; CHECK: addne
; CHECK: bxne lr		; CHECK: bxne lr
switch i32 %c, label %cond_next [		switch i32 %c, label %cond_next [
i32 1, label %cond_true		i32 1, label %cond_true
i32 7, label %cond_true		i32 7, label %cond_true
]		]

cond_true:		cond_true:
%tmp12 = add i32 %a, 1		%tmp12 = add i32 %a, 1
%tmp1518 = add i32 %tmp12, %b		%tmp1518 = add i32 %tmp12, %b
ret i32 %tmp1518		ret i32 %tmp1518

cond_next:		cond_next:
%tmp15 = add i32 %b, %a		%tmp15 = add i32 %b, %a
ret i32 %tmp15		ret i32 %tmp15
}		}

define i32 @t2(i32 %a, i32 %b) nounwind {		define i32 @t2(i32 %a, i32 %b) nounwind {
entry:		entry:
; CHECK-LABEL: t2:		; CHECK-LABEL: t2:
; CHECK: ite gt		; CHECK: ite {{gt\|le}}
; CHECK: subgt		; CHECK-DAG: suble
; CHECK: suble		; CHECK-DAG: subgt
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]		%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
br i1 %tmp1434, label %bb17, label %bb.outer		br i1 %tmp1434, label %bb17, label %bb.outer

bb.outer: ; preds = %cond_false, %entry		bb.outer: ; preds = %cond_false, %entry
%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; <i32> [#uses=5]		%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; <i32> [#uses=5]
%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]		%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
br label %bb		br label %bb

Show All 16 Lines	cond_false: ; preds = %bb
%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10 ; <i1> [#uses=1]		%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10 ; <i1> [#uses=1]
br i1 %tmp14, label %bb17, label %bb.outer		br i1 %tmp14, label %bb17, label %bb.outer

bb17: ; preds = %cond_false, %cond_true, %entry		bb17: ; preds = %cond_false, %cond_true, %entry
%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]		%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
ret i32 %a_addr.026.1		ret i32 %a_addr.026.1
}		}

		define i32 @t2_nomerge(i32 %a, i32 %b) nounwind {
		entry:
		; CHECK-LABEL: t2_nomerge:
		; CHECK-NOT: ite {{gt\|le}}
		; CHECK-NOT: suble
		; CHECK-NOT: subgt
		%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
		br i1 %tmp1434, label %bb17, label %bb.outer

		bb.outer: ; preds = %cond_false, %entry
		%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; <i32> [#uses=5]
		%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
		br label %bb

		bb: ; preds = %cond_true, %bb.outer
		%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
		%tmp. = sub i32 0, %b_addr.021.0.ph ; <i32> [#uses=1]
		%tmp.40 = mul i32 %indvar, %tmp. ; <i32> [#uses=1]
		%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph ; <i32> [#uses=6]
		%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph ; <i1> [#uses=1]
		br i1 %tmp3, label %cond_true, label %cond_false

		cond_true: ; preds = %bb
		%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph ; <i32> [#uses=2]
		%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph ; <i1> [#uses=1]
		%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
		br i1 %tmp1437, label %bb17, label %bb

		cond_false: ; preds = %bb
		%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0 ; <i32> [#uses=2]
		%tmp14 = icmp eq i32 %b_addr.021.0.ph, %tmp10 ; <i1> [#uses=1]
		br i1 %tmp14, label %bb17, label %bb.outer

		bb17: ; preds = %cond_false, %cond_true, %entry
		%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
		ret i32 %a_addr.026.1
		}

@x = external global i32* ; <i32**> [#uses=1]		@x = external global i32* ; <i32**> [#uses=1]

define void @foo(i32 %a) nounwind {		define void @foo(i32 %a) nounwind {
entry:		entry:
%tmp = load i32, i32* @x ; <i32*> [#uses=1]		%tmp = load i32, i32* @x ; <i32*> [#uses=1]
store i32 %a, i32* %tmp		store i32 %a, i32* %tmp
ret void		ret void
}		}
Show All 18 Lines