This is an archive of the discontinued LLVM Phabricator instance.

Differential D20505

Codegen: Make chains from lattice-shaped CFGs
AbandonedPublic

Authored by iteratee on May 20 2016, 6:33 PM.

Download Raw Diff

Details

Reviewers

davidxl
• tstellarAMD
haicheng

Summary

This change extends D27742 to allow a chain of triangles to
tail-duplicate and produce a lattice. The essential change is that if a
predecessor has the same successors as a layout predecessors, we ignore
that block when considering if we can tail-duplicate into unplaced
predecessors.

As an example consider the following CFG:

  B   D   F   H
 / \ / \ / \ / \
A---C---E---G---Ret

Where A,C,E,G are all small (Currently 2 instructions).

The CFG preserving layout is then A,B,C,D,E,F,G,H,Ret.

The current code will copy C into B, E into D and G into F and yield the layout
A,C,B(C),E,D(E),F(G),G,H,ret

define void @straight_test(i32 %tag) {
entry:
  br label %test1
test1: ; A
  %tagbit1 = and i32 %tag, 1
  %tagbit1eq0 = icmp eq i32 %tagbit1, 0
  br i1 %tagbit1eq0, label %test2, label %optional1
optional1: ; B
  call void @a()
  br label %test2
test2: ; C
  %tagbit2 = and i32 %tag, 2
  %tagbit2eq0 = icmp eq i32 %tagbit2, 0
  br i1 %tagbit2eq0, label %test3, label %optional2
optional2: ; D
  call void @b()
  br label %test3
test3: ; E
  %tagbit3 = and i32 %tag, 4
  %tagbit3eq0 = icmp eq i32 %tagbit3, 0
  br i1 %tagbit3eq0, label %test4, label %optional3
optional3: ; F
  call void @c()
  br label %test4
test4: ; G
  %tagbit4 = and i32 %tag, 8
  %tagbit4eq0 = icmp eq i32 %tagbit4, 0
  br i1 %tagbit4eq0, label %exit, label %optional4
optional4: ; H
  call void @d()
  br label %exit
exit:
  ret void
}

here is the layout after D27742:

straight_test:                          # @straight_test
; ... Prologue elided
; BB#0:                                 # %entry ; A (merged with test1)
; ... More prologue elided
        mr 30, 3
        andi. 3, 30, 1
        bc 12, 1, .LBB0_2
; BB#1:                                 # %test2 ; C
        rlwinm. 3, 30, 0, 30, 30
        beq      0, .LBB0_3
        b .LBB0_4
.LBB0_2:                                # %optional1 ; B (copy of C)
        bl a
        nop
        rlwinm. 3, 30, 0, 30, 30
        bne      0, .LBB0_4
.LBB0_3:                                # %test3 ; E
        rlwinm. 3, 30, 0, 29, 29
        beq      0, .LBB0_5
        b .LBB0_6
.LBB0_4:                                # %optional2 ; D (copy of E)
        bl b
        nop
        rlwinm. 3, 30, 0, 29, 29
        bne      0, .LBB0_6
.LBB0_5:                                # %test4 ; G
        rlwinm. 3, 30, 0, 28, 28
        beq      0, .LBB0_8
        b .LBB0_7
.LBB0_6:                                # %optional3 ; F (copy of G)
        bl c
        nop
        rlwinm. 3, 30, 0, 28, 28
        beq      0, .LBB0_8
.LBB0_7:                                # %optional4 ; H
        bl d
        nop
.LBB0_8:                                # %exit ; Ret
        ld 30, 96(1)                    # 8-byte Folded Reload
        addi 1, 1, 112
        ld 0, 16(1)
        mtlr 0
        blr

This is where the more bold strategy of this patch comes in. We allow E
to be placed, even though its predecessor B (after copying C) is
unplaced, because it is lattice shaped after tail-duplication.
This then produces the layout A,C,E,G,B,D,F,H,Ret. This layout does have
back edges, which is a negative, but it has a bigger compensating
positive, which is that it handles the case where there are long strings
of skipped blocks much better than the original layout. Both layouts
handle runs of executed blocks equally well. Branch prediction also
improves if there is any correlation between subsequent optional blocks.

Here is the resulting concrete layout:

straight_test:                          # @straight_test
; BB#0:                                 # %entry ; A (merged with test1)
        mr 30, 3
        andi. 3, 30, 1
        bc 12, 1, .LBB0_4
; BB#1:                                 # %test2 ; C
        rlwinm. 3, 30, 0, 30, 30
        bne      0, .LBB0_5
.LBB0_2:                                # %test3 ; E
        rlwinm. 3, 30, 0, 29, 29
        bne      0, .LBB0_6
.LBB0_3:                                # %test4 ; G
        rlwinm. 3, 30, 0, 28, 28
        bne      0, .LBB0_7
        b .LBB0_8
.LBB0_4:                                # %optional1 ; B (Copy of C)
        bl a
        nop
        rlwinm. 3, 30, 0, 30, 30
        beq      0, .LBB0_2
.LBB0_5:                                # %optional2 ; D (Copy of E)
        bl b
        nop
        rlwinm. 3, 30, 0, 29, 29
        beq      0, .LBB0_3
.LBB0_6:                                # %optional3 ; F (Copy of G)
        bl c
        nop
        rlwinm. 3, 30, 0, 28, 28
        beq      0, .LBB0_8
.LBB0_7:                                # %optional4 ; H
        bl d
        nop
.LBB0_8:                                # %exit

Diff Detail

Event Timeline

iteratee updated this revision to Diff 58025.May 20 2016, 6:33 PM

iteratee retitled this revision from to Codegen: Outline for chains of tail-duplicable blocks..

iteratee updated this object.

iteratee added a reviewer: haicheng.

iteratee set the repository for this revision to rL LLVM.

iteratee added subscribers: llvm-commits, chandlerc, echristo.

Herald added subscribers: dsanders, jyknight, jfb. · View Herald TranscriptMay 20 2016, 6:33 PM

iteratee added parent revisions: D20379: Codegen: Fix broken assumption in Tail Merge., D18226: Codegen: Tail-duplicate during placement..May 20 2016, 6:33 PM

sunfish added a subscriber: sunfish.May 20 2016, 7:04 PM

iteratee mentioned this in D18226: Codegen: Tail-duplicate during placement..May 23 2016, 12:42 PM

iteratee mentioned this in D20604: Codegen: Don't tail-duplicate blocks with un-analyzable fallthrough..May 24 2016, 3:39 PM

iteratee added a parent revision: D20604: Codegen: Don't tail-duplicate blocks with un-analyzable fallthrough..

iteratee removed a parent revision: D20604: Codegen: Don't tail-duplicate blocks with un-analyzable fallthrough..May 25 2016, 5:29 PM

Added fixes for AMDGPU tests, as some intervening change has enabled this optimization for that target.

Herald added a reviewer: • tstellarAMD. · View Herald TranscriptMay 25 2016, 5:32 PM

iteratee edited edge metadata.May 31 2016, 11:35 AM

iteratee added a subscriber: kbarton.

Did a quick run through for clarity. A few inline comments. Few requests to break things up. Check for coding style nits across the entire set of code and feel free to run clang format on the lines you've changed.

Thanks for the work so far!

-eric

lib/CodeGen/MachineBlockPlacement.cpp
724	Sadly I'm not sure if you're adding or deleting whitespace here. Either way feel free to do it separately.
756–757	Go ahead and commit this separately (along with the one below). Also "mismatch" and you shouldn't need the \n.
825–826	Can you document everything that's going on here more please? In particular, what's going on with the callback here and why it needs to be a callback rather than happening on the spot.
1288	Once again I can't remember if we have autobrief turned on or not...
1297	Formatting.
1312	Coding style nit: no braces around single lines.

Added a couple of comments and tidied formatting.

Formatting and comments.

iteratee added inline comments.Jun 8 2016, 2:59 PM

lib/CodeGen/MachineBlockPlacement.cpp
825–826	I've added a comment to this effect, but the reason it has to be a callback is because none of the things that occur would be valid after deleting the block. (use after free). As to the rest, the function is broken up into small chunks with a comment as to what each chunk is doing. Is there something more you'd like to see?
1288	Even if we do, it's probably better to match the existing style for this change, and clean it up in a separate patch.

Add comments about callback

davidxl added a reviewer: davidxl.Jun 13 2016, 10:40 AM

Kyle, can you update your patch and do a rebase -- there were recent restructure changes in MBP which can make the code cleaner.

iteratee mentioned this in D21674: [BranchFolding] Update UnavoidableBlocks for OutlineOptionalBranches.Jun 24 2016, 10:33 AM

Added changes to handle re-laying out code that had been tail-duplicated into the same shape. Necessary to work correctly with tail merging during layout.

Herald added a subscriber: nemanjai. · View Herald TranscriptJun 28 2016, 4:25 PM

OK, this took longer than I thought it would, because I had to come up with
a good way to interact with tail-merging during layout. Please take a look
now.

Kyle.

minor cleanups.

thanks. I don't seem to find explicit test cases added for this change. can you add one ?

Please also update the description with a real motivation example -- the original code and the pseudo code after the transformation.

davidxl added inline comments.Jun 28 2016, 5:32 PM

lib/CodeGen/MachineBlockPlacement.cpp
322	This document does not help understand the meaning. Can add a reference to detailed description of the algoirthm in other place (e.g. function definition).
408	Brief documentation.
411	Same here.
1118	Please outline this big part into its own method.
lib/CodeGen/TailDuplicator.cpp
787 ↗	(On Diff #62155)	Split out the refactor change.
872 ↗	(On Diff #62155)	Split out the clean-up changes.
test/CodeGen/PowerPC/tail-dup-layout.ll
1	This test case can use some simplifications. Why not just do simple function call in optional branches? The test block can also be simplified for instance testing input parameters.

iteratee mentioned this in rL278288: Codegen: Don't tail-duplicate blocks with un-analyzable fallthrough..Aug 10 2016, 2:11 PM

Lots of updates. Mainly pulled some of the changes into D18226 and expanded the commit message.

Herald added a subscriber: mzolotukhin. · View Herald TranscriptAug 11 2016, 3:29 PM

iteratee updated this object.Aug 11 2016, 3:35 PM

Minor fix.

Add brief comments to method declarations.

include/llvm/Analysis/LoopInfoImpl.h
188–194	I should probably split this out.
lib/CodeGen/MachineBlockPlacement.cpp
1118	This was pulled into D18226 and placed in its own method there.
lib/CodeGen/TailDuplicator.cpp
872 ↗	(On Diff #62155)	I think you mean the line below. I'll split that out. The line above isn't clean up.

iteratee mentioned this in rL278866: Codegen: Don't tail-duplicate blocks with un-analyzable fallthrough..Aug 16 2016, 4:04 PM

Simple rebase.

junbuml added a subscriber: junbuml.Aug 31 2016, 11:17 AM

iteratee removed parent revisions: D18226: Codegen: Tail-duplicate during placement., D20379: Codegen: Fix broken assumption in Tail Merge..Nov 1 2016, 4:39 PM

There are two independent problems that this patch tries to address.

Enable tail duplication for cases when current layout prefers topological order
Handling a sequence of tail-duplicatable blocks.

Please split out 1) and 2) into two different patches.

For patch 1), I don't think it is the right approach to piggy back the implementation on the outline heuristics, please split it out. Ideally, the fix should be simply add one new heuristic checked before hasBetterLayoutSuccessor check or preserve top order only when tail dup is not good:

if (hasBetterLayoutSuccessor(... ) ) {
         if (!IsTailDupCandidate(Succ)) {
               continue;
         }
 }

This is now MUCH shorter.

I realized with some help from davidxl that I didn't need to tie this to the outlining.

Also, because we need to recognize the pattern that occurs from repeated tail-duplication (So that when we repeat layout, we get the same result), we just recognize the pattern instead of using the delay set, as it's redundant.

I need to rewrite the description, but the code should be much easier to review now. The change the placement algorithm is now 22 lines, and most of that is a utility function for CFG matching.

Herald edited edge metadata. · View Herald TranscriptDec 13 2016, 5:17 PM

Herald added subscribers: nhaehnle, wdng. · View Herald Transcript

davidxl added inline comments.Dec 14 2016, 3:33 PM

lib/CodeGen/MachineBlockPlacement.cpp
572	Add a documentation line to this method.
572	Add more explanation here (as comment) and possible with a simple example?

arsenm added a subscriber: arsenm.Dec 15 2016, 1:03 PM

arsenm added inline comments.

test/CodeGen/AMDGPU/convergent-inlineasm.ll
34	Unnecessary whitespace change

iteratee mentioned this in D27742: CodeGen: Allow small copyable blocks to "break" the CFG..Dec 21 2016, 4:28 PM

Rebase and re-write description

Herald edited edge metadata. · View Herald TranscriptJan 6 2017, 5:11 PM

iteratee retitled this revision from Codegen: Outline for chains of tail-duplicable blocks. to Codegen: Make chains from lattice-shaped CFGs.Jan 6 2017, 5:15 PM

iteratee updated this object.

iteratee edited edge metadata.

Add comments as requested

Herald edited edge metadata. · View Herald TranscriptJan 9 2017, 4:04 PM

More comments.

Herald edited edge metadata. · View Herald TranscriptJan 9 2017, 4:32 PM

What is the base revision of this patch?

Since the patch has been rewritten, is it possible to create a new patch (after D27742 lands) and abandon this one? A clean restart can simplify things a lot.

In D20505#641524, @davidxl wrote:

What is the base revision of this patch?

The base is D27742

Since the patch has been rewritten, is it possible to create a new patch (after D27742 lands) and abandon this one? A clean restart can simplify things a lot.

I don't really want to submit D27742 without this patch. I created a new patch as you requested and marked it as a child of D27742.

Please see https://reviews.llvm.org/D28522

Revision Contents

Path

Size

include/

llvm/

Analysis/

LoopInfoImpl.h

9 lines

lib/

CodeGen/

MachineBlockPlacement.cpp

241 lines

test/

CodeGen/

AArch64/

aarch64-dynamic-stack-layout.ll

12 lines

arm64-atomic.ll

22 lines

arm64-ccmp.ll

4 lines

arm64-shrink-wrapping.ll

5 lines

branch-relax-bcc.ll

14 lines

combine-comparisons-by-cse.ll

4 lines

fcmp.ll

4 lines

rm_redundant_cmp.ll

8 lines

tbz-tbnz.ll

38 lines

AMDGPU/

cf-loop-on-constant.ll

4 lines

convergent-inlineasm.ll

7 lines

5 lines

8 lines

10 lines

5 lines

30 lines

5 lines

8 lines

ARM/

2013-05-05-IfConvertBug.ll

8 lines

arm-shrink-wrapping.ll

21 lines

4 lines

4 lines

35 lines

2 lines

2 lines

Mips/

llvm-ir/

32 lines

28 lines

28 lines

11 lines

PowerPC/

bdzlr.ll

6 lines

tail-dup-layout.ll

126 lines

SPARC/

sjlj.ll

9 lines

SystemZ/

tdc-06.ll

14 lines

Thumb/

thumb-shrink-wrapping.ll

11 lines

Thumb2/

cbnz.ll

2 lines

ifcvt-compare.ll

2 lines

WebAssembly/

mem-intrinsics.ll

2 lines

X86/

2012-08-17-legalizer-crash.ll

2 lines

7 lines

5 lines

34 lines

4 lines

critical-edge-split-2.ll

5 lines

shrink-wrap-chkstk.ll

5 lines

twoaddr-coalesce-3.ll

4 lines

x86-shrink-wrap-unwind.ll

24 lines

x86-shrink-wrapping.ll

18 lines

Diff 67760

include/llvm/Analysis/LoopInfoImpl.h

	Show First 20 Lines • Show All 179 Lines • ▼ Show 20 Lines
	/// information. NewBB is set to be a new member of the current loop.			/// information. NewBB is set to be a new member of the current loop.
	/// Because of this, it is added as a member of all parent loops, and is added			/// Because of this, it is added as a member of all parent loops, and is added
	/// to the specified LoopInfo object as being in the current basic block. It			/// to the specified LoopInfo object as being in the current basic block. It
	/// is not valid to replace the loop header with this method.			/// is not valid to replace the loop header with this method.
	///			///
	template<class BlockT, class LoopT>			template<class BlockT, class LoopT>
	void LoopBase<BlockT, LoopT>::			void LoopBase<BlockT, LoopT>::
	addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LIB) {			addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LIB) {
	assert((Blocks.empty() \|\| LIB[getHeader()] == this) &&			#ifndef NDEBUG
	"Incorrect LI specified for this loop!");			if (!Blocks.empty()) {
				auto SameHeader = LIB[getHeader()];
				assert(contains(SameHeader) && getHeader() == SameHeader->getHeader()
				&& "Incorrect LI specified for this loop!");
				}
				#endif
				iterateeAuthorUnsubmitted Not Done Reply Inline Actions I should probably split this out. iteratee: I should probably split this out.
	assert(NewBB && "Cannot add a null basic block to the loop!");			assert(NewBB && "Cannot add a null basic block to the loop!");
	assert(!LIB[NewBB] && "BasicBlock already in the loop!");			assert(!LIB[NewBB] && "BasicBlock already in the loop!");

	LoopT L = static_cast<LoopT >(this);			LoopT L = static_cast<LoopT >(this);

	// Add the loop mapping to the LoopInfo object...			// Add the loop mapping to the LoopInfo object...
	LIB.BBMap[NewBB] = L;			LIB.BBMap[NewBB] = L;

	▲ Show 20 Lines • Show All 343 Lines • Show Last 20 Lines

lib/CodeGen/MachineBlockPlacement.cpp

Show First 20 Lines • Show All 300 Lines • ▼ Show 20 Lines	class MachineBlockPlacement : public MachineFunctionPass {

/// \brief Duplicator used to duplicate tails during placement.		/// \brief Duplicator used to duplicate tails during placement.
///		///
/// Placement decisions can open up new tail duplication opportunities, but		/// Placement decisions can open up new tail duplication opportunities, but
/// since tail duplication affects placement decisions of later blocks, it		/// since tail duplication affects placement decisions of later blocks, it
/// must be done inline.		/// must be done inline.
TailDuplicator TailDup;		TailDuplicator TailDup;

/// \brief A set of blocks that are unavoidably execute, i.e. they dominate		/// \brief A set of blocks that are unavoidably executed.
/// all terminators of the MachineFunction.		///
		/// i.e. they dominate
		/// all terminators of the MachineFunction. Also used within loops for blocks
		/// that are unavoidable within the loop.
SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;		SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;

		/// \brief A set of delayed blocks for tail-duplication.
		///
		/// These blocks form a second spine through a loop/function, and so
		/// predecessors within this set do not need to be able to placed.
		/// This allows the tail-duplicated spine (or similar cfg) to grow beyond
		/// 2 blocks.
		SmallPtrSet<MachineBasicBlock *, 8> TailDupDelayBlocks;
		davidxlUnsubmitted Done Reply Inline Actions This document does not help understand the meaning. Can add a reference to detailed description of the algoirthm in other place (e.g. function definition). davidxl: This document does not help understand the meaning. Can add a reference to detailed description…

/// \brief Allocator and owner of BlockChain structures.		/// \brief Allocator and owner of BlockChain structures.
///		///
/// We build BlockChains lazily while processing the loop structure of		/// We build BlockChains lazily while processing the loop structure of
/// a function. To reduce malloc traffic, we allocate them using this		/// a function. To reduce malloc traffic, we allocate them using this
/// slab-like allocator, and destroy them after the pass completes. An		/// slab-like allocator, and destroy them after the pass completes. An
/// important guarantee is that this allocator produces stable pointers to		/// important guarantee is that this allocator produces stable pointers to
/// the chains.		/// the chains.
SpecificBumpPtrAllocator<BlockChain> ChainAllocator;		SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines	class MachineBlockPlacement : public MachineFunctionPass {
MachineBasicBlock *findBestLoopExit(MachineLoop &L,		MachineBasicBlock *findBestLoopExit(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);		const BlockFilterSet &LoopBlockSet);
BlockFilterSet collectLoopBlockSet(MachineLoop &L);		BlockFilterSet collectLoopBlockSet(MachineLoop &L);
void buildLoopChains(MachineLoop &L);		void buildLoopChains(MachineLoop &L);
void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,		void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
const BlockFilterSet &LoopBlockSet);		const BlockFilterSet &LoopBlockSet);
void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L,		void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L,
const BlockFilterSet &LoopBlockSet);		const BlockFilterSet &LoopBlockSet);
void collectMustExecuteBBs();
void buildCFGChains();		void buildCFGChains();
void optimizeBranches();		void optimizeBranches();
void alignBlocks();		void alignBlocks();
		void computeLoopUnavoidableBlocks(MachineLoop &L);
		void computeUnavoidableBlocks();
		bool canTailDuplicateAllPreds(MachineBasicBlock BB, MachineBasicBlock Succ,
		davidxlUnsubmitted Done Reply Inline Actions Brief documentation. davidxl: Brief documentation.
		BlockChain &Chain,
		const BlockFilterSet *BlockFilter);
		void delayTailDuplicatedBlocks(MachineBasicBlock BB, MachineBasicBlock Succ,
		davidxlUnsubmitted Done Reply Inline Actions Same here. davidxl: Same here.
		BlockChain &Chain,
		const BlockFilterSet *BlockFilter);

public:		public:
static char ID; // Pass identification, replacement for typeid		static char ID; // Pass identification, replacement for typeid
MachineBlockPlacement() : MachineFunctionPass(ID) {		MachineBlockPlacement() : MachineFunctionPass(ID) {
initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());		initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
}		}

bool runOnMachineFunction(MachineFunction &F) override;		bool runOnMachineFunction(MachineFunction &F) override;

void getAnalysisUsage(AnalysisUsage &AU) const override {		void getAnalysisUsage(AnalysisUsage &AU) const override {
▲ Show 20 Lines • Show All 140 Lines • ▼ Show 20 Lines	getAdjustedProbability(BranchProbability OrigProb,
if (SuccProbN >= SuccProbD)		if (SuccProbN >= SuccProbD)
SuccProb = BranchProbability::getOne();		SuccProb = BranchProbability::getOne();
else		else
SuccProb = BranchProbability(SuccProbN, SuccProbD);		SuccProb = BranchProbability(SuccProbN, SuccProbD);

return SuccProb;		return SuccProb;
}		}

		static bool hasSameSuccessors(
		davidxlUnsubmitted Done Reply Inline Actions Add a documentation line to this method. davidxl: Add a documentation line to this method.
		davidxlUnsubmitted Done Reply Inline Actions Add more explanation here (as comment) and possible with a simple example? davidxl: Add more explanation here (as comment) and possible with a simple example?
		MachineBasicBlock &BB, SmallPtrSetImpl<MachineBasicBlock *> &Successors) {
		if (BB.succ_size() != Successors.size())
		return false;
		// We don't want to count self-loops
		if (Successors.count(&BB))
		return false;
		for (MachineBasicBlock *Succ : BB.successors())
		if (!Successors.count(Succ))
		return false;
		return true;
		}

		/// When the option TailDupPlacement is on, this method checks if the
		/// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
		/// into all of its unplaced, unfiltered predecessors, that are not BB. In
		/// addition we keep a set of blocks that have been tail-duplicated into and
		/// allow those blocks to be unplaced as well. This allows the creation of a
		/// second (larger) spine and a short fallthrough spine.
		/// We also identify blocks with the CFG that would have been produced by
		/// tail-duplication and lay them out in the same manner.
		bool MachineBlockPlacement::canTailDuplicateAllPreds(
		MachineBasicBlock BB, MachineBasicBlock Succ, BlockChain &Chain,
		const BlockFilterSet *BlockFilter) {
		DEBUG(dbgs() << "Checking to see if block " << getBlockName(Succ)
		<< " can tail duplicate into all its predecessors.\n");
		bool IsSimple = TailDup.isSimpleBB(Succ);

		if (!TailDup.shouldTailDuplicate(Succ->getParent(), IsSimple, Succ)) {
		DEBUG(dbgs() << "Skipping because it is "
		<< "not a candidate for duplication.\n");
		return false;
		}
		// For CFG checking.
		SmallPtrSet<MachineBasicBlock *, 4> Successors(BB->succ_begin(), BB->succ_end());
		for (MachineBasicBlock *Pred : Succ->predecessors()) {
		// Make sure all unplaced and unfiltered predecessors are either part
		// of the second spine, or can be tail-duplicated into.
		if (Pred == BB \|\| (BlockFilter && !BlockFilter->count(Pred))
		\|\| BlockToChain[Pred] == &Chain)
		continue;
		// If Pred is part of the growing second spine, we don't need to be
		// able to copy succ onto the end of it.
		if (TailDupDelayBlocks.count(Pred) > 0)
		continue;
		if (!TailDup.canTailDuplicate(Succ, Pred)) {
		DEBUG(dbgs() << "Possibly skipping because it can't be duplicated into block "
		<< getBlockName(Pred) << ".\n");
		// Check for #Successors > 1 to make sure we aren't just outlining in the
		// triangle case.
		if (Successors.size() > 1
		&& hasSameSuccessors(*Pred, Successors)) {
		DEBUG(dbgs() << "Not skipping because it looks like a tail-duplicated block.\n");
		continue;
		} else {
		DEBUG(dbgs() << "Skipping because it can't be duplicated into block "
		<< getBlockName(Pred) << ".\n");
		}
		return false;
		}
		}
		return true;
		}

		/// Add all un-filtered unplaced blocks that will be duplicated into to the
		/// delay set.
		void MachineBlockPlacement::delayTailDuplicatedBlocks(
		MachineBasicBlock BB, MachineBasicBlock Succ, BlockChain &Chain,
		const BlockFilterSet *BlockFilter) {
		for (MachineBasicBlock *Pred : Succ->predecessors()) {
		if (Pred == BB \|\| (BlockFilter && !BlockFilter->count(Pred))
		\|\| BlockToChain[Pred] == &Chain
		\|\| TailDupDelayBlocks.count(Pred) > 0)
		continue;
		DEBUG(dbgs() << "Delaying block: " << getBlockName(Pred) << ".\n");
		TailDupDelayBlocks.insert(Pred);
		}
		}

/// When the option OutlineOptionalBranches is on, this method		/// When the option OutlineOptionalBranches is on, this method
/// checks if the fallthrough candidate block \p Succ (of block		/// checks if the fallthrough candidate block \p Succ (of block
/// \p BB) also has other unscheduled predecessor blocks which		/// \p BB) also has other unscheduled predecessor blocks which
/// are also successors of \p BB (forming triangular shape CFG).		/// are also successors of \p BB (forming triangular shape CFG).
/// If none of such predecessors are small, it returns true.		/// If none of such predecessors are small, it returns true.
/// The caller can choose to select \p Succ as the layout successors		/// The caller can choose to select \p Succ as the layout successors
/// so that \p Succ's predecessors (optional branches) can be		/// so that \p Succ's predecessors (optional branches) can be
/// outlined.		/// outlined.
/// FIXME: fold this with more general layout cost analysis.		/// FIXME: fold this with more general layout cost analysis.
bool MachineBlockPlacement::shouldPredBlockBeOutlined(		bool MachineBlockPlacement::shouldPredBlockBeOutlined(
MachineBasicBlock BB, MachineBasicBlock Succ, BlockChain &Chain,		MachineBasicBlock BB, MachineBasicBlock Succ, BlockChain &Chain,
const BlockFilterSet *BlockFilter, BranchProbability SuccProb,		const BlockFilterSet *BlockFilter, BranchProbability SuccProb,
BranchProbability HotProb) {		BranchProbability HotProb) {
if (!OutlineOptionalBranches)		if (!OutlineOptionalBranches && !TailDupPlacement)
return false;		return false;
// If we outline optional branches, look whether Succ is unavoidable, i.e.		// If we outline optional branches, look whether Succ is unavoidable, i.e.
// dominates all terminators of the MachineFunction. If it does, other		// dominates all terminators of the MachineFunction. If it does, other
// successors must be optional. Don't do this for cold branches.		// successors must be optional. Don't do this for cold branches.
if (SuccProb > HotProb.getCompl() && UnavoidableBlocks.count(Succ) > 0) {		if (SuccProb > HotProb.getCompl() && UnavoidableBlocks.count(Succ) > 0) {
		bool TailDupDelay;
		if (OutlineOptionalBranches)
		TailDupDelay = false;
		else if (TailDupPlacement
		&& canTailDuplicateAllPreds(BB, Succ, Chain, BlockFilter))
		TailDupDelay = true;
		else
		return false;
for (MachineBasicBlock *Pred : Succ->predecessors()) {		for (MachineBasicBlock *Pred : Succ->predecessors()) {
// Check whether there is an unplaced optional branch.		// Check whether there is an unplaced optional branch.
if (Pred == Succ \|\| (BlockFilter && !BlockFilter->count(Pred)) \|\|		if (Pred == Succ \|\| (BlockFilter && !BlockFilter->count(Pred)) \|\|
BlockToChain[Pred] == &Chain)		BlockToChain[Pred] == &Chain)
continue;		continue;
// Check whether the optional branch has exactly one BB.		// Check whether the optional branch has exactly one BB.
if (Pred->pred_size() > 1 \|\| *Pred->pred_begin() != BB)		if (Pred->pred_size() > 1 \|\| *Pred->pred_begin() != BB)
continue;		continue;
// Check whether the optional branch is small.		// Check whether the optional branch is small.
if (Pred->size() < OutlineOptionalThreshold)		if (Pred->size() < OutlineOptionalThreshold)
return false;		return false;
}		}
		if (TailDupDelay)
		delayTailDuplicatedBlocks(BB, Succ, Chain, BlockFilter);
return true;		return true;
} else		} else
return false;		return false;
}		}

// When profile is not present, return the StaticLikelyProb.		// When profile is not present, return the StaticLikelyProb.
// When profile is available, we need to handle the triangle-shape CFG.		// When profile is available, we need to handle the triangle-shape CFG.
static BranchProbability getLayoutSuccessorProbThreshold(		static BranchProbability getLayoutSuccessorProbThreshold(
Show All 16 Lines	if (Succ1->isSuccessor(Succ2) \|\| Succ2->isSuccessor(Succ1)) {
*/		*/
return BranchProbability(2 * ProfileLikelyProb, 150);		return BranchProbability(2 * ProfileLikelyProb, 150);
}		}
}		}
return BranchProbability(ProfileLikelyProb, 100);		return BranchProbability(ProfileLikelyProb, 100);
}		}

/// Checks to see if the layout candidate block \p Succ has a better layout		/// Checks to see if the layout candidate block \p Succ has a better layout
/// predecessor than \c BB. If yes, returns true.		/// predecessor than \c BB. If yes, returns true.
		echristoUnsubmitted Done Reply Inline Actions Sadly I'm not sure if you're adding or deleting whitespace here. Either way feel free to do it separately. echristo: Sadly I'm not sure if you're adding or deleting whitespace here. Either way feel free to do it…
bool MachineBlockPlacement::hasBetterLayoutPredecessor(		bool MachineBlockPlacement::hasBetterLayoutPredecessor(
MachineBasicBlock BB, MachineBasicBlock Succ, BlockChain &SuccChain,		MachineBasicBlock BB, MachineBasicBlock Succ, BlockChain &SuccChain,
BranchProbability SuccProb, BranchProbability RealSuccProb,		BranchProbability SuccProb, BranchProbability RealSuccProb,
BlockChain &Chain, const BlockFilterSet *BlockFilter) {		BlockChain &Chain, const BlockFilterSet *BlockFilter) {

// There isn't a better layout when there are no unscheduled predecessors.		// There isn't a better layout when there are no unscheduled predecessors.
if (SuccChain.UnscheduledPredecessors == 0)		if (SuccChain.UnscheduledPredecessors == 0)
return false;		return false;
Show All 15 Lines	bool MachineBlockPlacement::hasBetterLayoutPredecessor(
// branch taken from BB to Pred, plus the cost of back taken branch		// branch taken from BB to Pred, plus the cost of back taken branch
// from Pred to Succ, as well as the additional cost associated		// from Pred to Succ, as well as the additional cost associated
// with the needed unconditional jump instruction from Pred To Succ.		// with the needed unconditional jump instruction from Pred To Succ.

// The cost of the topological order layout is the taken branch cost		// The cost of the topological order layout is the taken branch cost
// from BB to Succ, so to make BB->Succ a viable candidate, the following		// from BB to Succ, so to make BB->Succ a viable candidate, the following
// must hold:		// must hold:
// 2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost		// 2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost
// < freq(BB->Succ) * taken_branch_cost.		// < freq(BB->Succ) * taken_branch_cost.
// Ignoring unconditional jump cost, we get		// Ignoring unconditional jump cost, we get
		echristoUnsubmitted Done Reply Inline Actions Go ahead and commit this separately (along with the one below). Also "mismatch" and you shouldn't need the \n. echristo: Go ahead and commit this separately (along with the one below). Also "mismatch" and you…
// freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,		// freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,
// prob(BB->Succ) > 2 * prob(BB->Pred)		// prob(BB->Succ) > 2 * prob(BB->Pred)
//		//
// When real profile data is available, we can precisely compute the		// When real profile data is available, we can precisely compute the
// probability threshold that is needed for edge BB->Succ to be considered.		// probability threshold that is needed for edge BB->Succ to be considered.
// Without profile data, the heuristic requires the branch bias to be		// Without profile data, the heuristic requires the branch bias to be
// a lot larger to make sure the signal is very strong (e.g. 80% default).		// a lot larger to make sure the signal is very strong (e.g. 80% default).
// -----------------------------------------------------------------		// -----------------------------------------------------------------
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	bool MachineBlockPlacement::hasBetterLayoutPredecessor(
// \| / \ \|		// \| / \ \|
// \| / \ \|		// \| / \ \|
// S1 S2		// S1 S2
//		//
// The current block is BB and edge BB->S1 is now being evaluated.		// The current block is BB and edge BB->S1 is now being evaluated.
// As above S->BB was already selected because		// As above S->BB was already selected because
// prob(S->BB) > prob(S->Pred). Assume that prob(BB->S1) >= prob(BB->S2).		// prob(S->BB) > prob(S->Pred). Assume that prob(BB->S1) >= prob(BB->S2).
//		//
// topo-order:		// topo-order:
//		//
		echristoUnsubmitted Done Reply Inline Actions Can you document everything that's going on here more please? In particular, what's going on with the callback here and why it needs to be a callback rather than happening on the spot. echristo: Can you document everything that's going on here more please? In particular, what's going on…
		iterateeAuthorUnsubmitted Done Reply Inline Actions I've added a comment to this effect, but the reason it has to be a callback is because none of the things that occur would be valid after deleting the block. (use after free). As to the rest, the function is broken up into small chunks with a comment as to what each chunk is doing. Is there something more you'd like to see? iteratee: I've added a comment to this effect, but the reason it has to be a callback is because none of…
// S-------\| ---S		// S-------\| ---S
// \| \| \| \|		// \| \| \| \|
// ---BB \| \| BB		// ---BB \| \| BB
// \| \| \| \|		// \| \| \| \|
// \| Pred----\| \| S1----		// \| Pred----\| \| S1----
// \| \| \| \|		// \| \| \| \|
// --(S1 or S2) ---Pred--		// --(S1 or S2) ---Pred--
//		//
▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines	auto AdjustedSumProb =
collectViableSuccessors(BB, Chain, BlockFilter, Successors);		collectViableSuccessors(BB, Chain, BlockFilter, Successors);

DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n");		DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n");
for (MachineBasicBlock *Succ : Successors) {		for (MachineBasicBlock *Succ : Successors) {
auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);		auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
BranchProbability SuccProb =		BranchProbability SuccProb =
getAdjustedProbability(RealSuccProb, AdjustedSumProb);		getAdjustedProbability(RealSuccProb, AdjustedSumProb);

// This heuristic is off by default.		// Full outlinining is off by default.
		// Tail-duplication during layout, and outlining blocks that are
		// tail-duplicated into is on by default.
if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb,		if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb,
HotProb))		HotProb))
return Succ;		return Succ;

BlockChain &SuccChain = *BlockToChain[Succ];		BlockChain &SuccChain = *BlockToChain[Succ];
// Skip the edge \c BB->Succ if block \c Succ has a better layout		// Skip the edge \c BB->Succ if block \c Succ has a better layout
// predecessor that yields lower global cost.		// predecessor that yields lower global cost.
if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,		if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
▲ Show 20 Lines • Show All 181 Lines • ▼ Show 20 Lines	if (!BestSucc) {
break;		break;

DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "		DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
"layout successor until the CFG reduces\n");		"layout successor until the CFG reduces\n");
}		}

// Placement may have changed tail duplication opportunities.		// Placement may have changed tail duplication opportunities.
// Check for that now.		// Check for that now.
if (TailDupPlacement && BestSucc) {		if (TailDupPlacement && BestSucc) {
		davidxlUnsubmitted Done Reply Inline Actions Please outline this big part into its own method. davidxl: Please outline this big part into its own method.
		iterateeAuthorUnsubmitted Done Reply Inline Actions This was pulled into D18226 and placed in its own method there. iteratee: This was pulled into D18226 and placed in its own method there.

bool Removed, DuplicatedToBB;		bool Removed, DuplicatedToBB;
maybeTailDuplicateBlock(BestSucc, BB, Chain, BlockFilter,		maybeTailDuplicateBlock(BestSucc, BB, Chain, BlockFilter,
PrevUnplacedBlockIt,		PrevUnplacedBlockIt,
Removed, DuplicatedToBB);		Removed, DuplicatedToBB);
if (Removed) {		if (Removed) {
if (DuplicatedToBB) {		if (DuplicatedToBB) {
// Do 2 things: If we duplicated into BB, we need to update		// Do 2 things: If we duplicated into BB, we need to update
Show All 23 Lines	if (TailDupPlacement && BestSucc) {
// don't bother laying it out, just go round the loop again with BB as		// don't bother laying it out, just go round the loop again with BB as
// the chain end.		// the chain end.
continue;		continue;
}		}
}		}

// Place this block, updating the datastructures to reflect its placement.		// Place this block, updating the datastructures to reflect its placement.
BlockChain &SuccChain = *BlockToChain[BestSucc];		BlockChain &SuccChain = *BlockToChain[BestSucc];
		TailDupDelayBlocks.erase(BestSucc);
// Zero out UnscheduledPredecessors for the successor we're about to merge in case		// Zero out UnscheduledPredecessors for the successor we're about to merge in case
// we selected a successor that didn't fit naturally into the CFG.		// we selected a successor that didn't fit naturally into the CFG.
SuccChain.UnscheduledPredecessors = 0;		SuccChain.UnscheduledPredecessors = 0;
DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to "		DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to "
<< getBlockName(BestSucc) << "\n");		<< getBlockName(BestSucc) << "\n");
markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);		markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);

Chain.merge(BestSucc, &SuccChain);		Chain.merge(BestSucc, &SuccChain);
▲ Show 20 Lines • Show All 113 Lines • ▼ Show 20 Lines	for (MachineBasicBlock *Succ : MBB->successors()) {
continue;		continue;
if (Succ == MBB)		if (Succ == MBB)
continue;		continue;
BlockChain &SuccChain = *BlockToChain[Succ];		BlockChain &SuccChain = *BlockToChain[Succ];
// Don't split chains, either this chain or the successor's chain.		// Don't split chains, either this chain or the successor's chain.
if (&Chain == &SuccChain) {		if (&Chain == &SuccChain) {
DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "		DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
<< getBlockName(Succ) << " (chain conflict)\n");		<< getBlockName(Succ) << " (chain conflict)\n");
continue;		continue;
		echristoUnsubmitted Done Reply Inline Actions Once again I can't remember if we have autobrief turned on or not... echristo: Once again I can't remember if we have autobrief turned on or not...
		iterateeAuthorUnsubmitted Done Reply Inline Actions Even if we do, it's probably better to match the existing style for this change, and clean it up in a separate patch. iteratee: Even if we do, it's probably better to match the existing style for this change, and clean it…
}		}

auto SuccProb = MBPI->getEdgeProbability(MBB, Succ);		auto SuccProb = MBPI->getEdgeProbability(MBB, Succ);
if (LoopBlockSet.count(Succ)) {		if (LoopBlockSet.count(Succ)) {
DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "		DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "
<< getBlockName(Succ) << " (" << SuccProb << ")\n");		<< getBlockName(Succ) << " (" << SuccProb << ")\n");
HasLoopingSucc = true;		HasLoopingSucc = true;
continue;		continue;
}		}
		echristoUnsubmitted Done Reply Inline Actions Formatting. echristo: Formatting.

unsigned SuccLoopDepth = 0;		unsigned SuccLoopDepth = 0;
if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {		if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
SuccLoopDepth = ExitLoop->getLoopDepth();		SuccLoopDepth = ExitLoop->getLoopDepth();
if (ExitLoop->contains(&L))		if (ExitLoop->contains(&L))
BlocksExitingToOuterLoop.insert(MBB);		BlocksExitingToOuterLoop.insert(MBB);
}		}

BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;		BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "		DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
<< getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";		<< getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");		MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
// Note that we bias this toward an existing layout successor to retain		// Note that we bias this toward an existing layout successor to retain
// incoming order in the absence of better information. The exit must have		// incoming order in the absence of better information. The exit must have
// a frequency higher than the current exit before we consider breaking		// a frequency higher than the current exit before we consider breaking
		echristoUnsubmitted Done Reply Inline Actions Coding style nit: no braces around single lines. echristo: Coding style nit: no braces around single lines.
// the layout.		// the layout.
BranchProbability Bias(100 - ExitBlockBias, 100);		BranchProbability Bias(100 - ExitBlockBias, 100);
if (!ExitingBB \|\| SuccLoopDepth > BestExitLoopDepth \|\|		if (!ExitingBB \|\| SuccLoopDepth > BestExitLoopDepth \|\|
ExitEdgeFreq > BestExitEdgeFreq \|\|		ExitEdgeFreq > BestExitEdgeFreq \|\|
(MBB->isLayoutSuccessor(Succ) &&		(MBB->isLayoutSuccessor(Succ) &&
!(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {		!(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
BestExitEdgeFreq = ExitEdgeFreq;		BestExitEdgeFreq = ExitEdgeFreq;
ExitingBB = MBB;		ExitingBB = MBB;
▲ Show 20 Lines • Show All 249 Lines • ▼ Show 20 Lines	for (MachineBasicBlock *LoopBB : L.getBlocks()) {
LoopBlockSet.insert(LoopBB);		LoopBlockSet.insert(LoopBB);
}		}
} else		} else
LoopBlockSet.insert(L.block_begin(), L.block_end());		LoopBlockSet.insert(L.block_begin(), L.block_end());

return LoopBlockSet;		return LoopBlockSet;
}		}


		/// \brief Finds unavoidable blocks within a loop.
		///
		/// These blocks form the loop spine, and knowing which blocks they are allow
		/// the loop-optional blocks to be outlined to the end of the loop,
		/// unconditionally or if they can form a second tail-duped spine.
		void MachineBlockPlacement::computeLoopUnavoidableBlocks(MachineLoop &L) {
		SmallVector<MachineBasicBlock *, 4> Exits;
		L.getLoopLatches(Exits);
		// Find the nearest common dominator of all of L's latches.
		MachineBasicBlock *Dominator = nullptr;
		for (MachineBasicBlock *MBB : Exits) {
		DEBUG(dbgs() << "Block: " << getBlockName(MBB)
		<< " is a latch.\n");
		if (Dominator == nullptr)
		Dominator = MBB;
		else
		Dominator = MDT->findNearestCommonDominator(Dominator, MBB);
		}

		Exits.clear();
		L.getExitingBlocks(Exits);
		for (MachineBasicBlock *MBB : Exits) {
		DEBUG(dbgs() << "Block: " << getBlockName(MBB)
		<< " is a loop exit.\n");
		if (MBB == L.getHeader())
		continue;
		if (Dominator == nullptr)
		Dominator = MBB;
		else
		Dominator = MDT->findNearestCommonDominator(Dominator, MBB);
		}

		// MBBs dominating this common dominator are unavoidable.
		UnavoidableBlocks.clear();
		for (MachineBasicBlock *MBB : L.getBlocks())
		if (MDT->dominates(MBB, Dominator)) {
		DEBUG(dbgs() << "Block: " << getBlockName(MBB)
		<< " is loop un-avoidable.\n");
		UnavoidableBlocks.insert(MBB);
		}
		}


		/// \brief Finds unavoidable blocks for the entire function
		///
		/// These blocks form the spine, and knowing which blocks they are allow
		/// the optional blocks to be outlined to the end of the function
		/// unconditionally or if they can form a second tail-duped spine.
		void MachineBlockPlacement::computeUnavoidableBlocks() {
		MachineBasicBlock * Terminator = nullptr;
		for (MachineBasicBlock &MBB : *F) {
		if (MBB.succ_size() == 0) {
		if (Terminator == nullptr)
		Terminator = &MBB;
		else
		Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
		}
		}

		// MBBs dominating this common dominator are unavoidable.
		UnavoidableBlocks.clear();
		// If there are no exit blocks from the function, punt and assume that there
		// are no unavoidable blocks. This will result in a linear layout.
		if (Terminator == nullptr)
		return;
		for (MachineBasicBlock &MBB : *F)
		if (MDT->dominates(&MBB, Terminator)) {
		DEBUG(dbgs() << "Block: " << getBlockName(&MBB)
		<< " is un-avoidable.\n");
		UnavoidableBlocks.insert(&MBB);
		}
		}

/// \brief Forms basic block chains from the natural loop structures.		/// \brief Forms basic block chains from the natural loop structures.
///		///
/// These chains are designed to preserve the existing structure of the code		/// These chains are designed to preserve the existing structure of the code
/// as much as possible. We can then stitch the chains together in a way which		/// as much as possible. We can then stitch the chains together in a way which
/// both preserves the topological structure and minimizes taken conditional		/// both preserves the topological structure and minimizes taken conditional
/// branches.		/// branches.
void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {		void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
// First recurse through any nested loops, building chains for those inner		// First recurse through any nested loops, building chains for those inner
// loops.		// loops.
for (MachineLoop *InnerLoop : L)		for (MachineLoop *InnerLoop : L)
buildLoopChains(*InnerLoop);		buildLoopChains(*InnerLoop);

assert(BlockWorkList.empty());		assert(BlockWorkList.empty());
assert(EHPadWorkList.empty());		assert(EHPadWorkList.empty());
BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);		BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);

		// Find the unavoidable blocks within this loop. This allows partial outlining
		// with tail duplication within a loop.
		if (TailDupPlacement) {
		computeLoopUnavoidableBlocks(L);
		TailDupDelayBlocks.clear();
		}

// Check if we have profile data for this function. If yes, we will rotate		// Check if we have profile data for this function. If yes, we will rotate
// this loop by modeling costs more precisely which requires the profile data		// this loop by modeling costs more precisely which requires the profile data
// for better layout.		// for better layout.
bool RotateLoopWithProfile =		bool RotateLoopWithProfile =
ForcePreciseRotationCost \|\|		ForcePreciseRotationCost \|\|
(PreciseRotationCost && F->getFunction()->getEntryCount());		(PreciseRotationCost && F->getFunction()->getEntryCount());

// First check to see if there is an obviously preferable top block for the		// First check to see if there is an obviously preferable top block for the
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines	DEBUG({
}		}
assert(!BadLoop && "Detected problems with the placement of this loop.");		assert(!BadLoop && "Detected problems with the placement of this loop.");
});		});

BlockWorkList.clear();		BlockWorkList.clear();
EHPadWorkList.clear();		EHPadWorkList.clear();
}		}

/// When OutlineOpitonalBranches is on, this method collects BBs that
/// dominates all terminator blocks of the function \p F.
void MachineBlockPlacement::collectMustExecuteBBs() {
if (OutlineOptionalBranches) {
// Find the nearest common dominator of all of F's terminators.
MachineBasicBlock *Terminator = nullptr;
for (MachineBasicBlock &MBB : *F) {
if (MBB.succ_size() == 0) {
if (Terminator == nullptr)
Terminator = &MBB;
else
Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
}
}

// MBBs dominating this common dominator are unavoidable.
UnavoidableBlocks.clear();
for (MachineBasicBlock &MBB : *F) {
if (MDT->dominates(&MBB, Terminator)) {
UnavoidableBlocks.insert(&MBB);
}
}
}
}

void MachineBlockPlacement::buildCFGChains() {		void MachineBlockPlacement::buildCFGChains() {
// Ensure that every BB in the function has an associated chain to simplify		// Ensure that every BB in the function has an associated chain to simplify
// the assumptions of the remaining algorithm.		// the assumptions of the remaining algorithm.
SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.		SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
for (MachineFunction::iterator FI = F->begin(), FE = F->end(); FI != FE;		for (MachineFunction::iterator FI = F->begin(), FE = F->end(); FI != FE;
++FI) {		++FI) {
MachineBasicBlock BB = &FI;		MachineBasicBlock BB = &FI;
BlockChain *Chain =		BlockChain *Chain =
Show All 15 Lines	for (;;) {
<< getBlockName(BB) << " -> " << getBlockName(NextBB)		<< getBlockName(BB) << " -> " << getBlockName(NextBB)
<< "\n");		<< "\n");
Chain->merge(NextBB, nullptr);		Chain->merge(NextBB, nullptr);
FI = NextFI;		FI = NextFI;
BB = NextBB;		BB = NextBB;
}		}
}		}

// Turned on with OutlineOptionalBranches option
collectMustExecuteBBs();

// Build any loop-based chains.		// Build any loop-based chains.
for (MachineLoop L : MLI)		for (MachineLoop L : MLI)
buildLoopChains(*L);		buildLoopChains(*L);

assert(BlockWorkList.empty());		assert(BlockWorkList.empty());
assert(EHPadWorkList.empty());		assert(EHPadWorkList.empty());

		// This must go after the loop chains, because the loop chains compute their
		// own loop-relative UnavoidableBlocks
		if (OutlineOptionalBranches \|\| TailDupPlacement) {
		computeUnavoidableBlocks();
		TailDupDelayBlocks.clear();
		}

SmallPtrSet<BlockChain *, 4> UpdatedPreds;		SmallPtrSet<BlockChain *, 4> UpdatedPreds;
for (MachineBasicBlock &MBB : *F)		for (MachineBasicBlock &MBB : *F)
fillWorkLists(&MBB, UpdatedPreds);		fillWorkLists(&MBB, UpdatedPreds);

BlockChain &FunctionChain = *BlockToChain[&F->front()];		BlockChain &FunctionChain = *BlockToChain[&F->front()];
buildChain(&F->front(), FunctionChain);		buildChain(&F->front(), FunctionChain);

#ifndef NDEBUG		#ifndef NDEBUG
▲ Show 20 Lines • Show All 322 Lines • ▼ Show 20 Lines	bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (MF.size() > 3 && EnableTailMerge) {		if (MF.size() > 3 && EnableTailMerge) {
unsigned TailMergeSize = TailDuplicateMergeThresholdPlacement + 1;		unsigned TailMergeSize = TailDuplicateMergeThresholdPlacement + 1;
if (MF.getFunction()->optForSize())		if (MF.getFunction()->optForSize())
TailMergeSize = 3;		TailMergeSize = 3;
BranchFolder BF(/EnableTailMerge=/true, TailMergeSize,		BranchFolder BF(/EnableTailMerge=/true, TailMergeSize,
/CommonHoist=/false, *MBFI,		/CommonHoist=/false, *MBFI,
*MBPI);		*MBPI);

DEBUG(MF.dump());
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),		if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>(), MLI,		getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
/AfterBlockPlacement=/true)) {		/AfterBlockPlacement=/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.		// Redo the layout if tail merging creates/removes/moves blocks.
DEBUG(MF.dump());
BlockToChain.clear();		BlockToChain.clear();
// Must redo the dominator tree if blocks were changed.		// Must redo the dominator tree if blocks were changed.
MDT->runOnMachineFunction(MF);		MDT->runOnMachineFunction(MF);
		BlockToChain.clear();
ChainAllocator.DestroyAll();		ChainAllocator.DestroyAll();
buildCFGChains();		buildCFGChains();
}		}
}		}

optimizeBranches();		optimizeBranches();
alignBlocks();		alignBlocks();

▲ Show 20 Lines • Show All 90 Lines • Show Last 20 Lines

test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll

	Show First 20 Lines • Show All 658 Lines • ▼ Show 20 Lines
	bb1:			bb1:
	ret void			ret void
	}			}

	; CHECK-LABEL: realign_conditional			; CHECK-LABEL: realign_conditional
	; No realignment in the prologue.			; No realignment in the prologue.
	; CHECK-NOT: and			; CHECK-NOT: and
	; CHECK-NOT: 0xffffffffffffffe0			; CHECK-NOT: 0xffffffffffffffe0
	; CHECK: tbz {{.}} .[[LABEL:.]]			; CHECK: tbnz {{.}} .[[LABEL:.]]
				; CHECK: ret
				; CHECK: .[[LABEL]]:
	; Stack is realigned in a non-entry BB.			; Stack is realigned in a non-entry BB.
	; CHECK: sub [[REG:x[01-9]+]], sp, #64			; CHECK: sub [[REG:x[01-9]+]], sp, #64
	; CHECK: and sp, [[REG]], #0xffffffffffffffe0			; CHECK: and sp, [[REG]], #0xffffffffffffffe0
	; CHECK: .[[LABEL]]:
	; CHECK: ret


	define void @realign_conditional2(i1 %b) {			define void @realign_conditional2(i1 %b) {
	entry:			entry:
	%tmp = alloca i8, i32 16			%tmp = alloca i8, i32 16
	br i1 %b, label %bb0, label %bb1			br i1 %b, label %bb0, label %bb1

	bb0:			bb0:
	%MyAlloca = alloca i8, i64 64, align 32			%MyAlloca = alloca i8, i64 64, align 32
	br label %bb1			br label %bb1

	bb1:			bb1:
	ret void			ret void
	}			}

	; CHECK-LABEL: realign_conditional2			; CHECK-LABEL: realign_conditional2
	; Extra realignment in the prologue (performance issue).			; Extra realignment in the prologue (performance issue).
	; CHECK: tbz {{.}} .[[LABEL:.]]			; CHECK: tbnz {{.}} .[[LABEL:.]]
				; CHECK: ret
				; CHECK: .[[LABEL]]:
	; CHECK: sub x9, sp, #32 // =32			; CHECK: sub x9, sp, #32 // =32
	; CHECK: and sp, x9, #0xffffffffffffffe0			; CHECK: and sp, x9, #0xffffffffffffffe0
	; CHECK: mov x19, sp			; CHECK: mov x19, sp
	; Stack is realigned in a non-entry BB.			; Stack is realigned in a non-entry BB.
	; CHECK: sub [[REG:x[01-9]+]], sp, #64			; CHECK: sub [[REG:x[01-9]+]], sp, #64
	; CHECK: and sp, [[REG]], #0xffffffffffffffe0			; CHECK: and sp, [[REG]], #0xffffffffffffffe0
	; CHECK: .[[LABEL]]:
	; CHECK: ret

	attributes #0 = { "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }			attributes #0 = { "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
	attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }			attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }

	!1 = !{!2, !2, i64 0}			!1 = !{!2, !2, i64 0}
	!2 = !{!"int", !3, i64 0}			!2 = !{!"int", !3, i64 0}
	!3 = !{!"omnipotent char", !4, i64 0}			!3 = !{!"omnipotent char", !4, i64 0}
	!4 = !{!"Simple C/C++ TBAA"}			!4 = !{!"Simple C/C++ TBAA"}

test/CodeGen/AArch64/arm64-atomic.ll

	; RUN: llc < %s -mtriple=arm64-eabi -asm-verbose=false -verify-machineinstrs -mcpu=cyclone \| FileCheck %s			; RUN: llc < %s -mtriple=arm64-eabi -asm-verbose=false -verify-machineinstrs -mcpu=cyclone \| FileCheck %s

	define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {			define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
	; CHECK-LABEL: val_compare_and_swap:			; CHECK-LABEL: val_compare_and_swap:
	; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0			; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
	; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:			; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
	; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]]			; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]]
	; CHECK-NEXT: cmp [[RESULT]], w1			; CHECK-NEXT: cmp [[RESULT]], w1
	; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]			; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
	; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]]			; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]]
	; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]			; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
	; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]			; CHECK-NEXT: ret
	; CHECK-NEXT: [[FAILBB]]:			; CHECK-NEXT: [[FAILBB]]:
	; CHECK-NEXT: clrex			; CHECK-NEXT: clrex
	; CHECK-NEXT: [[EXITBB]]:			; CHECK-NEXT: ret
	%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire			%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
	%val = extractvalue { i32, i1 } %pair, 0			%val = extractvalue { i32, i1 } %pair, 0
	ret i32 %val			ret i32 %val
	}			}

	define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {			define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
	; CHECK-LABEL: val_compare_and_swap_from_load:			; CHECK-LABEL: val_compare_and_swap_from_load:
	; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2]			; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2]
	; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:			; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
	; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0]			; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0]
	; CHECK-NEXT: cmp [[RESULT]], w1			; CHECK-NEXT: cmp [[RESULT]], w1
	; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]			; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
	; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0]			; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0]
	; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]			; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
	; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]			; CHECK-NEXT: mov x0, x[[ADDR]]
				; CHECK-NEXT: ret
	; CHECK-NEXT: [[FAILBB]]:			; CHECK-NEXT: [[FAILBB]]:
	; CHECK-NEXT: clrex			; CHECK-NEXT: clrex
	; CHECK-NEXT: [[EXITBB]]:			; CHECK-NEXT: mov x0, x[[ADDR]]
				; CHECK-NEXT: ret
	%new = load i32, i32* %pnew			%new = load i32, i32* %pnew
	%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire			%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
	%val = extractvalue { i32, i1 } %pair, 0			%val = extractvalue { i32, i1 } %pair, 0
	ret i32 %val			ret i32 %val
	}			}

	define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {			define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
	; CHECK-LABEL: val_compare_and_swap_rel:			; CHECK-LABEL: val_compare_and_swap_rel:
	; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0			; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
	; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:			; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
	; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]			; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]]
	; CHECK-NEXT: cmp [[RESULT]], w1			; CHECK-NEXT: cmp [[RESULT]], w1
	; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]			; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
	; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]			; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]]
	; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]			; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
	; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]			; CHECK-NEXT: ret
	; CHECK-NEXT: [[FAILBB]]:			; CHECK-NEXT: [[FAILBB]]:
	; CHECK-NEXT: clrex			; CHECK-NEXT: clrex
	; CHECK-NEXT: [[EXITBB]]:			; CHECK-NEXT: ret
	%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic			%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic
	%val = extractvalue { i32, i1 } %pair, 0			%val = extractvalue { i32, i1 } %pair, 0
	ret i32 %val			ret i32 %val
	}			}

	define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {			define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
	; CHECK-LABEL: val_compare_and_swap_64:			; CHECK-LABEL: val_compare_and_swap_64:
	; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0			; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
	; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:			; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
	; CHECK-NEXT: ldxr [[RESULT:x[0-9]+]], [x[[ADDR]]]			; CHECK-NEXT: ldxr [[RESULT:x[0-9]+]], [x[[ADDR]]]
	; CHECK-NEXT: cmp [[RESULT]], x1			; CHECK-NEXT: cmp [[RESULT]], x1
	; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]			; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
	; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]]			; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]]
	; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]			; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
	; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]			; CHECK-NEXT: ret
	; CHECK-NEXT: [[FAILBB]]:			; CHECK-NEXT: [[FAILBB]]:
	; CHECK-NEXT: clrex			; CHECK-NEXT: clrex
	; CHECK-NEXT: [[EXITBB]]:			; CHECK-NEXT: ret
	%pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic			%pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic
	%val = extractvalue { i64, i1 } %pair, 0			%val = extractvalue { i64, i1 } %pair, 0
	ret i64 %val			ret i64 %val
	}			}

	define i32 @fetch_and_nand(i32* %p) #0 {			define i32 @fetch_and_nand(i32* %p) #0 {
	; CHECK-LABEL: fetch_and_nand:			; CHECK-LABEL: fetch_and_nand:
	; CHECK: [[TRYBB:.?LBB[0-9_]+]]:			; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
	▲ Show 20 Lines • Show All 301 Lines • Show Last 20 Lines

test/CodeGen/AArch64/arm64-ccmp.ll

Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines	if.end:
ret i32 7		ret i32 7
}		}

; Second block clobbers the flags, can't convert (easily).		; Second block clobbers the flags, can't convert (easily).
; CHECK: single_flagclobber		; CHECK: single_flagclobber
; CHECK: cmp		; CHECK: cmp
; CHECK: b.eq		; CHECK: b.eq
; CHECK: cmp		; CHECK: cmp
; CHECK: b.gt		; CHECK: b.le
define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {		define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
entry:		entry:
%cmp = icmp eq i32 %a, 5		%cmp = icmp eq i32 %a, 5
br i1 %cmp, label %if.then, label %lor.lhs.false		br i1 %cmp, label %if.then, label %lor.lhs.false

lor.lhs.false: ; preds = %entry		lor.lhs.false: ; preds = %entry
%cmp1 = icmp slt i32 %b, 7		%cmp1 = icmp slt i32 %b, 7
%mul = shl nsw i32 %b, 1		%mul = shl nsw i32 %b, 1
Show All 10 Lines	if.end: ; preds = %if.then, %lor.lhs.false
ret i32 7		ret i32 7
}		}

; Second block clobbers the flags and ends with a tbz terminator.		; Second block clobbers the flags and ends with a tbz terminator.
; CHECK: single_flagclobber_tbz		; CHECK: single_flagclobber_tbz
; CHECK: cmp		; CHECK: cmp
; CHECK: b.eq		; CHECK: b.eq
; CHECK: cmp		; CHECK: cmp
; CHECK: tbz		; CHECK: tbnz
define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp {		define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp {
entry:		entry:
%cmp = icmp eq i32 %a, 5		%cmp = icmp eq i32 %a, 5
br i1 %cmp, label %if.then, label %lor.lhs.false		br i1 %cmp, label %if.then, label %lor.lhs.false

lor.lhs.false: ; preds = %entry		lor.lhs.false: ; preds = %entry
%cmp1 = icmp slt i32 %b, 7		%cmp1 = icmp slt i32 %b, 7
%mul = shl nsw i32 %b, 1		%mul = shl nsw i32 %b, 1
▲ Show 20 Lines • Show All 569 Lines • Show Last 20 Lines

test/CodeGen/AArch64/arm64-shrink-wrapping.ll

	; RUN: llc %s -o - -enable-shrink-wrap=true -disable-post-ra -disable-fp-elim \| FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE			; RUN: llc %s -o - -enable-shrink-wrap=true -disable-post-ra -disable-fp-elim \| FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
	; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra -disable-fp-elim \| FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE			; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra -disable-fp-elim \| FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
	target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"			target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
	target triple = "arm64-apple-ios"			target triple = "arm64-apple-ios"


	; Initial motivating example: Simple diamond with a call just on one side.			; Initial motivating example: Simple diamond with a call just on one side.
	; CHECK-LABEL: foo:			; CHECK-LABEL: foo:
	;			;
	; Compare the arguments and jump to exit.			; Compare the arguments and jump to exit.
	; No prologue needed.			; No prologue needed.
	; ENABLE: cmp w0, w1			; ENABLE: cmp w0, w1
	; ENABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]]			; ENABLE-NEXT: b.lt [[PROLOGUE_LABEL:LBB[0-9_]+]]
				; ENABLE: ret
	;			;
	; Prologue code.			; Prologue code.
				; ENABLE: [[PROLOGUE_LABEL]]:
	; CHECK: sub sp, sp, #32			; CHECK: sub sp, sp, #32
	; CHECK-NEXT: stp [[SAVE_SP:x[0-9]+]], [[CSR:x[0-9]+]], [sp, #16]			; CHECK-NEXT: stp [[SAVE_SP:x[0-9]+]], [[CSR:x[0-9]+]], [sp, #16]
	; CHECK-NEXT: add [[SAVE_SP]], sp, #16			; CHECK-NEXT: add [[SAVE_SP]], sp, #16
	;			;
	; Compare the arguments and jump to exit.			; Compare the arguments and jump to exit.
	; After the prologue is set.			; After the prologue is set.
	; DISABLE: cmp w0, w1			; DISABLE: cmp w0, w1
	; DISABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]]			; DISABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]]
	;			;
	; Store %a in the alloca.			; Store %a in the alloca.
	; CHECK: stur w0, {{\[}}[[SAVE_SP]], #-4]			; CHECK: stur w0, {{\[}}[[SAVE_SP]], #-4]
	; Set the alloca address in the second argument.			; Set the alloca address in the second argument.
	; CHECK-NEXT: sub x1, [[SAVE_SP]], #4			; CHECK-NEXT: sub x1, [[SAVE_SP]], #4
	; Set the first argument to zero.			; Set the first argument to zero.
	; CHECK-NEXT: mov w0, wzr			; CHECK-NEXT: mov w0, wzr
	; CHECK-NEXT: bl _doSomething			; CHECK-NEXT: bl _doSomething
	;			;
	; Without shrink-wrapping, epilogue is in the exit block.			; Without shrink-wrapping, epilogue is in the exit block.
	; DISABLE: [[EXIT_LABEL]]:			; DISABLE: [[EXIT_LABEL]]:
	; Epilogue code.			; Epilogue code.
	; CHECK-NEXT: ldp x{{[0-9]+}}, [[CSR]], [sp, #16]			; CHECK-NEXT: ldp x{{[0-9]+}}, [[CSR]], [sp, #16]
	; CHECK-NEXT: add sp, sp, #32			; CHECK-NEXT: add sp, sp, #32
	;			;
	; With shrink-wrapping, exit block is a simple return.			; With shrink-wrapping, exit block is a simple return.
	; ENABLE: [[EXIT_LABEL]]:
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	define i32 @foo(i32 %a, i32 %b) {			define i32 @foo(i32 %a, i32 %b) {
	%tmp = alloca i32, align 4			%tmp = alloca i32, align 4
	%tmp2 = icmp slt i32 %a, %b			%tmp2 = icmp slt i32 %a, %b
	br i1 %tmp2, label %true, label %false			br i1 %tmp2, label %true, label %false

	true:			true:
	store i32 %a, i32* %tmp, align 4			store i32 %a, i32* %tmp, align 4
	▲ Show 20 Lines • Show All 672 Lines • Show Last 20 Lines

test/CodeGen/AArch64/branch-relax-bcc.ll

Show All 29 Lines	bb1:
store volatile i32 42, i32* undef		store volatile i32 42, i32* undef
ret i32 0		ret i32 0
}		}

declare i32 @foo() #0		declare i32 @foo() #0

; CHECK-LABEL: _block_split:		; CHECK-LABEL: _block_split:
; CHECK: cmp w0, #5		; CHECK: cmp w0, #5
; CHECK-NEXT: b.eq [[LONG_BR_BB:LBB[0-9]+_[0-9]+]]		; CHECK-NEXT: b.ne [[LOR_LHS_FALSE_BB:LBB[0-9]+_[0-9]+]]
; CHECK-NEXT: b [[LOR_LHS_FALSE_BB:LBB[0-9]+_[0-9]+]]

; CHECK: [[LONG_BR_BB]]:
; CHECK-NEXT: b [[IF_THEN_BB:LBB[0-9]+_[0-9]+]]		; CHECK-NEXT: b [[IF_THEN_BB:LBB[0-9]+_[0-9]+]]

; CHECK: [[LOR_LHS_FALSE_BB]]:		; CHECK: [[LOR_LHS_FALSE_BB]]:
; CHECK: cmp w{{[0-9]+}}, #16		; CHECK: cmp w{{[0-9]+}}, #16
; CHECK-NEXT: b.le [[IF_THEN_BB]]		; CHECK-NEXT: b.le [[IF_THEN_BB]]
; CHECK-NEXT: b [[IF_END_BB:LBB[0-9]+_[0-9]+]]

; CHECK: [[IF_THEN_BB]]:		; CHECK: ; %if.end
		; CHECK: #0x7
		; CHECK: ret

		; CHECK: [[IF_THEN_BB]]
; CHECK: bl _foo		; CHECK: bl _foo
; CHECK-NOT: b L		; CHECK-NOT: b L

; CHECK: [[IF_END_BB]]:
; CHECK: #0x7		; CHECK: #0x7
; CHECK: ret		; CHECK: ret
define i32 @block_split(i32 %a, i32 %b) #0 {		define i32 @block_split(i32 %a, i32 %b) #0 {
entry:		entry:
%cmp = icmp eq i32 %a, 5		%cmp = icmp eq i32 %a, 5
br i1 %cmp, label %if.then, label %lor.lhs.false		br i1 %cmp, label %if.then, label %lor.lhs.false

lor.lhs.false: ; preds = %entry		lor.lhs.false: ; preds = %entry
Show All 16 Lines

test/CodeGen/AArch64/combine-comparisons-by-cse.ll

	Show First 20 Lines • Show All 258 Lines • ▼ Show 20 Lines
	}			}

	; undefined external to prevent possible optimizations			; undefined external to prevent possible optimizations
	declare void @do_something() #1			declare void @do_something() #1

	define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {			define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
	; CHECK-LABEL: do_nothing_if_resultant_opcodes_would_differ			; CHECK-LABEL: do_nothing_if_resultant_opcodes_would_differ
	; CHECK: cmn			; CHECK: cmn
	; CHECK: b.gt			; CHECK-NEXT: b.le
	; CHECK: cmp			; CHECK: cmp
	; CHECK: b.gt			; CHECK-NEXT: b.le
	entry:			entry:
	%0 = load i32, i32* @a, align 4			%0 = load i32, i32* @a, align 4
	%cmp4 = icmp slt i32 %0, -1			%cmp4 = icmp slt i32 %0, -1
	br i1 %cmp4, label %while.body.preheader, label %while.end			br i1 %cmp4, label %while.body.preheader, label %while.end

	while.body.preheader: ; preds = %entry			while.body.preheader: ; preds = %entry
	br label %while.body			br label %while.body

	▲ Show 20 Lines • Show All 197 Lines • Show Last 20 Lines

test/CodeGen/AArch64/fcmp.ll

Show All 25 Lines	; test. Obviously, other sequences are valid.
br i1 %tst3, label %t4, label %end		br i1 %tst3, label %t4, label %end
; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}		; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]]		; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]]
; CHECK-NEXT: b.vs .[[T4]]		; CHECK-NEXT: b.vs .[[T4]]
t4:		t4:
%tst4 = fcmp uge float %a, -0.0		%tst4 = fcmp uge float %a, -0.0
br i1 %tst4, label %t5, label %end		br i1 %tst4, label %t5, label %end
; CHECK-NOT: fcmp {{s[0-9]+}}, #0.0		; CHECK-NOT: fcmp {{s[0-9]+}}, #0.0
; CHECK: b.mi .LBB		; CHECK: b.pl .LBB

t5:		t5:
call void @bar(i32 0)		call void @bar(i32 0)
ret void		ret void
end:		end:
ret void		ret void

}		}
Show All 22 Lines	; test. Obviously, other sequences are valid.
br i1 %tst3, label %t4, label %end		br i1 %tst3, label %t4, label %end
; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}		; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]]		; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]]
; CHECK-NEXT: b.vs .[[T4]]		; CHECK-NEXT: b.vs .[[T4]]
t4:		t4:
%tst4 = fcmp uge double %a, -0.0		%tst4 = fcmp uge double %a, -0.0
br i1 %tst4, label %t5, label %end		br i1 %tst4, label %t5, label %end
; CHECK-NOT: fcmp {{d[0-9]+}}, #0.0		; CHECK-NOT: fcmp {{d[0-9]+}}, #0.0
; CHECK: b.mi .LBB		; CHECK: b.pl .LBB

t5:		t5:
call void @bar(i32 0)		call void @bar(i32 0)
ret void		ret void
end:		end:
ret void		ret void

}		}

test/CodeGen/AArch64/rm_redundant_cmp.ll

; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 \| FileCheck %s		; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 \| FileCheck %s

; The following cases are for i16		; The following cases are for i16

%struct.s_signed_i16 = type { i16, i16, i16 }		%struct.s_signed_i16 = type { i16, i16, i16 }
%struct.s_unsigned_i16 = type { i16, i16, i16 }		%struct.s_unsigned_i16 = type { i16, i16, i16 }

@cost_s_i8_i16 = common global %struct.s_signed_i16 zeroinitializer, align 2		@cost_s_i8_i16 = common global %struct.s_signed_i16 zeroinitializer, align 2
@cost_u_i16 = common global %struct.s_unsigned_i16 zeroinitializer, align 2		@cost_u_i16 = common global %struct.s_unsigned_i16 zeroinitializer, align 2

define void @test_i16_2cmp_signed_1() {		define void @test_i16_2cmp_signed_1() {
; CHECK-LABEL: test_i16_2cmp_signed_1		; CHECK-LABEL: test_i16_2cmp_signed_1
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}		; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK-NEXT: b.gt		; CHECK-NEXT: b.gt
; CHECK-NOT: cmp		; CHECK-NOT: cmp
; CHECK: b.ne		; CHECK: b.eq
entry:		entry:
%0 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2		%0 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
%1 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2		%1 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
%cmp = icmp sgt i16 %0, %1		%cmp = icmp sgt i16 %0, %1
br i1 %cmp, label %if.then, label %if.else		br i1 %cmp, label %if.then, label %if.else

if.then: ; preds = %entry		if.then: ; preds = %entry
store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2		store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
Show All 39 Lines	if.end8: ; preds = %if.else, %if.then7, %if.then
ret void		ret void
}		}

define void @test_i16_2cmp_unsigned_1() {		define void @test_i16_2cmp_unsigned_1() {
; CHECK-LABEL: test_i16_2cmp_unsigned_1		; CHECK-LABEL: test_i16_2cmp_unsigned_1
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}		; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK-NEXT: b.hi		; CHECK-NEXT: b.hi
; CHECK-NOT: cmp		; CHECK-NOT: cmp
; CHECK: b.ne		; CHECK: b.eq
entry:		entry:
%0 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2		%0 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
%1 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2		%1 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
%cmp = icmp ugt i16 %0, %1		%cmp = icmp ugt i16 %0, %1
br i1 %cmp, label %if.then, label %if.else		br i1 %cmp, label %if.then, label %if.else

if.then: ; preds = %entry		if.then: ; preds = %entry
store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2		store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines
@cost_u_i8 = common global %struct.s_unsigned_i8 zeroinitializer, align 2		@cost_u_i8 = common global %struct.s_unsigned_i8 zeroinitializer, align 2


define void @test_i8_2cmp_signed_1() {		define void @test_i8_2cmp_signed_1() {
; CHECK-LABEL: test_i8_2cmp_signed_1		; CHECK-LABEL: test_i8_2cmp_signed_1
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}		; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK-NEXT: b.gt		; CHECK-NEXT: b.gt
; CHECK-NOT: cmp		; CHECK-NOT: cmp
; CHECK: b.ne		; CHECK: b.eq
entry:		entry:
%0 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2		%0 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
%1 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2		%1 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
%cmp = icmp sgt i8 %0, %1		%cmp = icmp sgt i8 %0, %1
br i1 %cmp, label %if.then, label %if.else		br i1 %cmp, label %if.then, label %if.else

if.then: ; preds = %entry		if.then: ; preds = %entry
store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2		store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
Show All 39 Lines	if.end8: ; preds = %if.else, %if.then7, %if.then
ret void		ret void
}		}

define void @test_i8_2cmp_unsigned_1() {		define void @test_i8_2cmp_unsigned_1() {
; CHECK-LABEL: test_i8_2cmp_unsigned_1		; CHECK-LABEL: test_i8_2cmp_unsigned_1
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}		; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK-NEXT: b.hi		; CHECK-NEXT: b.hi
; CHECK-NOT: cmp		; CHECK-NOT: cmp
; CHECK: b.ne		; CHECK: b.eq
entry:		entry:
%0 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2		%0 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
%1 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2		%1 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
%cmp = icmp ugt i8 %0, %1		%cmp = icmp ugt i8 %0, %1
br i1 %cmp, label %if.then, label %if.else		br i1 %cmp, label %if.then, label %if.else

if.then: ; preds = %entry		if.then: ; preds = %entry
store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2		store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
▲ Show 20 Lines • Show All 53 Lines • Show Last 20 Lines

test/CodeGen/AArch64/tbz-tbnz.ll

	; RUN: llc < %s -O1 -mtriple=aarch64-eabi \| FileCheck %s			; RUN: llc < %s -O1 -mtriple=aarch64-eabi \| FileCheck %s

	declare void @t()			declare void @t()

	define void @test1(i32 %a) {			define void @test1(i32 %a) {
	; CHECK-LABEL: @test1			; CHECK-LABEL: @test1
	entry:			entry:
	%sub = add nsw i32 %a, -12			%sub = add nsw i32 %a, -12
	%cmp = icmp slt i32 %sub, 0			%cmp = icmp slt i32 %sub, 0
	br i1 %cmp, label %if.then, label %if.end			br i1 %cmp, label %if.then, label %if.end

	; CHECK: sub [[CMP:w[0-9]+]], w0, #12			; CHECK: sub [[CMP:w[0-9]+]], w0, #12
	; CHECK: tbz [[CMP]], #31			; CHECK: tbnz [[CMP]], #31

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test2(i64 %a) {			define void @test2(i64 %a) {
	; CHECK-LABEL: @test2			; CHECK-LABEL: @test2
	entry:			entry:
	%sub = add nsw i64 %a, -12			%sub = add nsw i64 %a, -12
	%cmp = icmp slt i64 %sub, 0			%cmp = icmp slt i64 %sub, 0
	br i1 %cmp, label %if.then, label %if.end			br i1 %cmp, label %if.then, label %if.end

	; CHECK: sub [[CMP:x[0-9]+]], x0, #12			; CHECK: sub [[CMP:x[0-9]+]], x0, #12
	; CHECK: tbz [[CMP]], #63			; CHECK: tbnz [[CMP]], #63

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test3(i32 %a) {			define void @test3(i32 %a) {
	; CHECK-LABEL: @test3			; CHECK-LABEL: @test3
	entry:			entry:
	%sub = add nsw i32 %a, -12			%sub = add nsw i32 %a, -12
	%cmp = icmp sgt i32 %sub, -1			%cmp = icmp sgt i32 %sub, -1
	br i1 %cmp, label %if.then, label %if.end			br i1 %cmp, label %if.then, label %if.end

	; CHECK: sub [[CMP:w[0-9]+]], w0, #12			; CHECK: sub [[CMP:w[0-9]+]], w0, #12
	; CHECK: tbnz [[CMP]], #31			; CHECK: tbz [[CMP]], #31

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test4(i64 %a) {			define void @test4(i64 %a) {
	; CHECK-LABEL: @test4			; CHECK-LABEL: @test4
	entry:			entry:
	%sub = add nsw i64 %a, -12			%sub = add nsw i64 %a, -12
	%cmp = icmp sgt i64 %sub, -1			%cmp = icmp sgt i64 %sub, -1
	br i1 %cmp, label %if.then, label %if.end			br i1 %cmp, label %if.then, label %if.end

	; CHECK: sub [[CMP:x[0-9]+]], x0, #12			; CHECK: sub [[CMP:x[0-9]+]], x0, #12
	; CHECK: tbnz [[CMP]], #63			; CHECK: tbz [[CMP]], #63

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test5(i32 %a) {			define void @test5(i32 %a) {
	; CHECK-LABEL: @test5			; CHECK-LABEL: @test5
	entry:			entry:
	%sub = add nsw i32 %a, -12			%sub = add nsw i32 %a, -12
	%cmp = icmp sge i32 %sub, 0			%cmp = icmp sge i32 %sub, 0
	br i1 %cmp, label %if.then, label %if.end			br i1 %cmp, label %if.then, label %if.end

	; CHECK: sub [[CMP:w[0-9]+]], w0, #12			; CHECK: sub [[CMP:w[0-9]+]], w0, #12
	; CHECK: tbnz [[CMP]], #31			; CHECK: tbz [[CMP]], #31

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test6(i64 %a) {			define void @test6(i64 %a) {
	; CHECK-LABEL: @test6			; CHECK-LABEL: @test6
	entry:			entry:
	%sub = add nsw i64 %a, -12			%sub = add nsw i64 %a, -12
	%cmp = icmp sge i64 %sub, 0			%cmp = icmp sge i64 %sub, 0
	br i1 %cmp, label %if.then, label %if.end			br i1 %cmp, label %if.then, label %if.end

	; CHECK: sub [[CMP:x[0-9]+]], x0, #12			; CHECK: sub [[CMP:x[0-9]+]], x0, #12
	; CHECK: tbnz [[CMP]], #63			; CHECK: tbz [[CMP]], #63

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test7(i32 %a) {			define void @test7(i32 %a) {
	; CHECK-LABEL: @test7			; CHECK-LABEL: @test7
	entry:			entry:
	%sub = sub nsw i32 %a, 12			%sub = sub nsw i32 %a, 12
	%cmp = icmp slt i32 %sub, 0			%cmp = icmp slt i32 %sub, 0
	br i1 %cmp, label %if.then, label %if.end			br i1 %cmp, label %if.then, label %if.end

	; CHECK: sub [[CMP:w[0-9]+]], w0, #12			; CHECK: sub [[CMP:w[0-9]+]], w0, #12
	; CHECK: tbz [[CMP]], #31			; CHECK: tbnz [[CMP]], #31

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}
	Show All 27 Lines

	if.then3:			if.then3:
	%shifted_op2 = shl i64 %val2, 62			%shifted_op2 = shl i64 %val2, 62
	%shifted_and2 = and i64 %val1, %shifted_op2			%shifted_and2 = and i64 %val1, %shifted_op2
	%tst4 = icmp sge i64 %shifted_and2, 0			%tst4 = icmp sge i64 %shifted_and2, 0
	br i1 %tst4, label %if.then4, label %if.end			br i1 %tst4, label %if.then4, label %if.end

	; CHECK: tst x0, x1, lsl #62			; CHECK: tst x0, x1, lsl #62
	; CHECK: b.lt			; CHECK: b.ge

	if.then4:			if.then4:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test9(i64 %val1) {			define void @test9(i64 %val1) {
	; CHECK-LABEL: @test9			; CHECK-LABEL: @test9
	%tst = icmp slt i64 %val1, 0			%tst = icmp slt i64 %val1, 0
	br i1 %tst, label %if.then, label %if.end			br i1 %tst, label %if.then, label %if.end

	; CHECK-NOT: cmp			; CHECK-NOT: cmp
	; CHECK: tbz x0, #63			; CHECK: tbnz x0, #63

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test10(i64 %val1) {			define void @test10(i64 %val1) {
	; CHECK-LABEL: @test10			; CHECK-LABEL: @test10
	%tst = icmp slt i64 %val1, 0			%tst = icmp slt i64 %val1, 0
	br i1 %tst, label %if.then, label %if.end			br i1 %tst, label %if.then, label %if.end

	; CHECK-NOT: cmp			; CHECK-NOT: cmp
	; CHECK: tbz x0, #63			; CHECK: tbnz x0, #63

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test11(i64 %val1, i64* %ptr) {			define void @test11(i64 %val1, i64* %ptr) {
	; CHECK-LABEL: @test11			; CHECK-LABEL: @test11

	; CHECK: ldr [[CMP:x[0-9]+]], [x1]			; CHECK: ldr [[CMP:x[0-9]+]], [x1]
	; CHECK-NOT: cmp			; CHECK-NOT: cmp
	; CHECK: tbz [[CMP]], #63			; CHECK: tbnz [[CMP]], #63

	%val = load i64, i64* %ptr			%val = load i64, i64* %ptr
	%tst = icmp slt i64 %val, 0			%tst = icmp slt i64 %val, 0
	br i1 %tst, label %if.then, label %if.end			br i1 %tst, label %if.then, label %if.end

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test12(i64 %val1) {			define void @test12(i64 %val1) {
	; CHECK-LABEL: @test12			; CHECK-LABEL: @test12
	%tst = icmp slt i64 %val1, 0			%tst = icmp slt i64 %val1, 0
	br i1 %tst, label %if.then, label %if.end			br i1 %tst, label %if.then, label %if.end

	; CHECK-NOT: cmp			; CHECK-NOT: cmp
	; CHECK: tbz x0, #63			; CHECK: tbnz x0, #63

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test13(i64 %val1, i64 %val2) {			define void @test13(i64 %val1, i64 %val2) {
	; CHECK-LABEL: @test13			; CHECK-LABEL: @test13
	%or = or i64 %val1, %val2			%or = or i64 %val1, %val2
	%tst = icmp slt i64 %or, 0			%tst = icmp slt i64 %or, 0
	br i1 %tst, label %if.then, label %if.end			br i1 %tst, label %if.then, label %if.end

	; CHECK: orr [[CMP:x[0-9]+]], x0, x1			; CHECK: orr [[CMP:x[0-9]+]], x0, x1
	; CHECK-NOT: cmp			; CHECK-NOT: cmp
	; CHECK: tbz [[CMP]], #63			; CHECK: tbnz [[CMP]], #63

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test14(i1 %cond) {			define void @test14(i1 %cond) {
	; CHECK-LABEL: @test14			; CHECK-LABEL: @test14
	br i1 %cond, label %if.end, label %if.then			br i1 %cond, label %if.end, label %if.then

	; CHECK-NOT: and			; CHECK-NOT: and
	; CHECK: tbnz w0, #0			; CHECK: tbz w0, #0

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test15(i1 %cond) {			define void @test15(i1 %cond) {
	; CHECK-LABEL: @test15			; CHECK-LABEL: @test15
	%cond1 = xor i1 %cond, -1			%cond1 = xor i1 %cond, -1
	br i1 %cond1, label %if.then, label %if.end			br i1 %cond1, label %if.then, label %if.end

	; CHECK-NOT: movn			; CHECK-NOT: movn
	; CHECK: tbnz w0, #0			; CHECK: tbz w0, #0

	if.then:			if.then:
	call void @t()			call void @t()
	br label %if.end			br label %if.end

	if.end:			if.end:
	ret void			ret void
	}			}

	define void @test16(i64 %in) {			define void @test16(i64 %in) {
	; CHECK-LABEL: @test16			; CHECK-LABEL: @test16
	%shl = shl i64 %in, 3			%shl = shl i64 %in, 3
	%and = and i64 %shl, 32			%and = and i64 %shl, 32
	%cond = icmp eq i64 %and, 0			%cond = icmp eq i64 %and, 0
	br i1 %cond, label %then, label %end			br i1 %cond, label %then, label %end

	; CHECK-NOT: lsl			; CHECK-NOT: lsl
	; CHECK: tbnz w0, #2			; CHECK: tbz w0, #2

	then:			then:
	call void @t()			call void @t()
	br label %end			br label %end

	end:			end:
	ret void			ret void
	}			}

	define void @test17(i64 %in) {			define void @test17(i64 %in) {
	; CHECK-LABEL: @test17			; CHECK-LABEL: @test17
	%shr = ashr i64 %in, 3			%shr = ashr i64 %in, 3
	%and = and i64 %shr, 1			%and = and i64 %shr, 1
	%cond = icmp eq i64 %and, 0			%cond = icmp eq i64 %and, 0
	br i1 %cond, label %then, label %end			br i1 %cond, label %then, label %end

	; CHECK-NOT: lsr			; CHECK-NOT: lsr
	; CHECK: tbnz w0, #3			; CHECK: tbz w0, #3

	then:			then:
	call void @t()			call void @t()
	br label %end			br label %end

	end:			end:
	ret void			ret void
	}			}

	define void @test18(i32 %in) {			define void @test18(i32 %in) {
	; CHECK-LABEL: @test18			; CHECK-LABEL: @test18
	%shr = ashr i32 %in, 2			%shr = ashr i32 %in, 2
	%cond = icmp sge i32 %shr, 0			%cond = icmp sge i32 %shr, 0
	br i1 %cond, label %then, label %end			br i1 %cond, label %then, label %end

	; CHECK-NOT: asr			; CHECK-NOT: asr
	; CHECK: tbnz w0, #31			; CHECK: tbz w0, #31

	then:			then:
	call void @t()			call void @t()
	br label %end			br label %end

	end:			end:
	ret void			ret void
	}			}

	define void @test19(i64 %in) {			define void @test19(i64 %in) {
	; CHECK-LABEL: @test19			; CHECK-LABEL: @test19
	%shl = lshr i64 %in, 3			%shl = lshr i64 %in, 3
	%trunc = trunc i64 %shl to i32			%trunc = trunc i64 %shl to i32
	%and = and i32 %trunc, 1			%and = and i32 %trunc, 1
	%cond = icmp eq i32 %and, 0			%cond = icmp eq i32 %and, 0
	br i1 %cond, label %then, label %end			br i1 %cond, label %then, label %end

	; CHECK-NOT: ubfx			; CHECK-NOT: ubfx
	; CHECK: tbnz w0, #3			; CHECK: tbz w0, #3

	then:			then:
	call void @t()			call void @t()
	br label %end			br label %end

	end:			end:
	ret void			ret void
	}			}

test/CodeGen/AMDGPU/cf-loop-on-constant.ll

	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s			; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s
	; RUN: llc -march=amdgcn -verify-machineinstrs -O0 < %s			; RUN: llc -march=amdgcn -verify-machineinstrs -O0 < %s

	; GCN-LABEL: {{^}}test_loop:			; GCN-LABEL: {{^}}test_loop:
	; GCN: [[LABEL:BB[0-9+]_[0-9]+]]:			; GCN: s_endpgm
				; GCN: [[LABEL:BB[0-9+]_[0-9]+]]: ; %for.body{{$}}
	; GCN: ds_read_b32			; GCN: ds_read_b32
	; GCN: ds_write_b32			; GCN: ds_write_b32
	; GCN: s_branch [[LABEL]]			; GCN: s_branch [[LABEL]]
	; GCN: s_endpgm
	define void @test_loop(float addrspace(3)* %ptr, i32 %n) nounwind {			define void @test_loop(float addrspace(3)* %ptr, i32 %n) nounwind {
	entry:			entry:
	%cmp = icmp eq i32 %n, -1			%cmp = icmp eq i32 %n, -1
	br i1 %cmp, label %for.exit, label %for.body			br i1 %cmp, label %for.exit, label %for.body

	for.exit:			for.exit:
	ret void			ret void

	▲ Show 20 Lines • Show All 104 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/convergent-inlineasm.ll

	; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s			; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s

	declare i32 @llvm.amdgcn.workitem.id.x() #0			declare i32 @llvm.amdgcn.workitem.id.x() #0
	; GCN-LABEL: {{^}}convergent_inlineasm:			; GCN-LABEL: {{^}}convergent_inlineasm:
	; GCN: BB#0:			; GCN: BB#0:
	; GCN: v_cmp_ne_i32_e64			; GCN: v_cmp_ne_i32_e64
	; GCN: ; mask branch			; GCN: ; mask branch
	; GCN: BB{{[0-9]+_[0-9]+}}:			; GCN: BB{{[0-9]+_[0-9]+}}:
				; GCN: BB{{[0-9]+_[0-9]+}}:
				; GCN: s_endpgm
	define void @convergent_inlineasm(i64 addrspace(1)* nocapture %arg) {			define void @convergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
	bb:			bb:
	%tmp = call i32 @llvm.amdgcn.workitem.id.x()			%tmp = call i32 @llvm.amdgcn.workitem.id.x()
	%tmp1 = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 1) #1			%tmp1 = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 1) #1
	%tmp2 = icmp eq i32 %tmp, 8			%tmp2 = icmp eq i32 %tmp, 8
	br i1 %tmp2, label %bb3, label %bb5			br i1 %tmp2, label %bb3, label %bb5

	bb3: ; preds = %bb			bb3: ; preds = %bb
	%tmp4 = getelementptr i64, i64 addrspace(1)* %arg, i32 %tmp			%tmp4 = getelementptr i64, i64 addrspace(1)* %arg, i32 %tmp
	store i64 %tmp1, i64 addrspace(1)* %arg, align 8			store i64 %tmp1, i64 addrspace(1)* %arg, align 8
	br label %bb5			br label %bb5

	bb5: ; preds = %bb3, %bb			bb5: ; preds = %bb3, %bb
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}nonconvergent_inlineasm:			; GCN-LABEL: {{^}}nonconvergent_inlineasm:
	; GCN: ; mask branch			; GCN: ; mask branch

	; GCN: BB{{[0-9]+_[0-9]+}}:			; GCN: BB{{[0-9]+_[0-9]+}}:
	; GCN: v_cmp_ne_i32_e64			; GCN: s_endpgm

	; GCN: BB{{[0-9]+_[0-9]+}}:			; GCN: BB{{[0-9]+_[0-9]+}}:
				; GCN: v_cmp_ne_i32_e64
				arsenmUnsubmitted Not Done Reply Inline Actions Unnecessary whitespace change arsenm: Unnecessary whitespace change
				; GCN: s_endpgm

	define void @nonconvergent_inlineasm(i64 addrspace(1)* nocapture %arg) {			define void @nonconvergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
	bb:			bb:
	%tmp = call i32 @llvm.amdgcn.workitem.id.x()			%tmp = call i32 @llvm.amdgcn.workitem.id.x()
	%tmp1 = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 1)			%tmp1 = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 1)
	%tmp2 = icmp eq i32 %tmp, 8			%tmp2 = icmp eq i32 %tmp, 8
	br i1 %tmp2, label %bb3, label %bb5			br i1 %tmp2, label %bb3, label %bb5

	bb3: ; preds = %bb			bb3: ; preds = %bb
	Show All 10 Lines

test/CodeGen/AMDGPU/salu-to-valu.ll

	Show First 20 Lines • Show All 431 Lines • ▼ Show 20 Lines
	}			}

	; Make sure we legalize vopc operands after moving an sopc to the value.			; Make sure we legalize vopc operands after moving an sopc to the value.

	; {{^}}sopc_vopc_legalize_bug:			; {{^}}sopc_vopc_legalize_bug:
	; GCN: s_load_dword [[SGPR:s[0-9]+]]			; GCN: s_load_dword [[SGPR:s[0-9]+]]
	; GCN: v_cmp_le_u32_e32 vcc, [[SGPR]], v{{[0-9]+}}			; GCN: v_cmp_le_u32_e32 vcc, [[SGPR]], v{{[0-9]+}}
	; GCN: s_and_b64 vcc, exec, vcc			; GCN: s_and_b64 vcc, exec, vcc
	; GCN: s_cbranch_vccnz [[EXIT:[A-Z0-9_]+]]			; GCN: s_cbranch_vccz [[SUCCESS:[A-Z0-9_]+]]
				; GCN: s_endpgm
				; GCN: {{^}}[[SUCCESS]]:
	; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1			; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
	; GCN-NOHSA: buffer_store_dword [[ONE]]			; GCN-NOHSA: buffer_store_dword [[ONE]]
	; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[ONE]]			; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[ONE]]
	; GCN; {{^}}[[EXIT]]:
	; GCN: s_endpgm			; GCN: s_endpgm
	define void @sopc_vopc_legalize_bug(i32 %cond, i32 addrspace(1)* %out, i32 addrspace(1)* %in) {			define void @sopc_vopc_legalize_bug(i32 %cond, i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
	bb3: ; preds = %bb2			bb3: ; preds = %bb2
	%tmp0 = bitcast i32 %cond to float			%tmp0 = bitcast i32 %cond to float
	%tmp1 = fadd float %tmp0, 2.500000e-01			%tmp1 = fadd float %tmp0, 2.500000e-01
	%tmp2 = bitcast float %tmp1 to i32			%tmp2 = bitcast float %tmp1 to i32
	%tmp3 = icmp ult i32 %tmp2, %cond			%tmp3 = icmp ult i32 %tmp2, %cond
	br i1 %tmp3, label %bb6, label %bb7			br i1 %tmp3, label %bb6, label %bb7
	Show All 11 Lines

test/CodeGen/AMDGPU/si-annotate-cf.ll

	Show First 20 Lines • Show All 87 Lines • ▼ Show 20 Lines
	declare float @llvm.fabs.f32(float) nounwind readnone			declare float @llvm.fabs.f32(float) nounwind readnone

	; This broke the old AMDIL cfg structurizer			; This broke the old AMDIL cfg structurizer
	; FUNC-LABEL: {{^}}loop_land_info_assert:			; FUNC-LABEL: {{^}}loop_land_info_assert:
	; SI: s_cmp_gt_i32			; SI: s_cmp_gt_i32
	; SI-NEXT: s_cbranch_scc0 [[ENDPGM:BB[0-9]+_[0-9]+]]			; SI-NEXT: s_cbranch_scc0 [[ENDPGM:BB[0-9]+_[0-9]+]]

	; SI: s_cmp_gt_i32			; SI: s_cmp_gt_i32
	; SI-NEXT: s_cbranch_scc1 [[ENDPGM]]			; SI-NEXT: s_cbranch_scc0 [[INFLOOP:BB[0-9]+_[0-9]+]]

	; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]
	; SI: s_branch [[INFLOOP]]

	; SI: [[ENDPGM]]:			; SI: [[ENDPGM]]:
	; SI: s_endpgm			; SI: s_endpgm

				; SI: [[INFLOOP]]
				; SI: s_branch [[INFLOOP]]
	define void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {			define void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {
	entry:			entry:
	%cmp = icmp sgt i32 %c0, 0			%cmp = icmp sgt i32 %c0, 0
	br label %while.cond.outer			br label %while.cond.outer

	while.cond.outer:			while.cond.outer:
	%tmp = load float, float addrspace(1)* undef			%tmp = load float, float addrspace(1)* undef
	br label %while.cond			br label %while.cond
	Show All 40 Lines

test/CodeGen/AMDGPU/skip-if-dead.ll

	Show First 20 Lines • Show All 262 Lines • ▼ Show 20 Lines

	; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]:			; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]:
	; CHECK: s_and_b64 vcc, exec,			; CHECK: s_and_b64 vcc, exec,
	; CHECK-NEXT: s_cbranch_vccz [[PHIBB:BB[0-9]+_[0-9]+]]			; CHECK-NEXT: s_cbranch_vccz [[PHIBB:BB[0-9]+_[0-9]+]]

	; CHECK: [[PHIBB]]:			; CHECK: [[PHIBB]]:
	; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]]			; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]]
	; CHECK: s_and_b64 vcc, exec, vcc			; CHECK: s_and_b64 vcc, exec, vcc
	; CHECK: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]]			; CHECK: s_cbranch_vccnz [[BB3:BB[0-9]+_[0-9]+]]

	; CHECK: ; BB#3: ; %bb10			; CHECK: ; %end
				; CHECK-NEXT: s_endpgm

				; CHECK: [[BB3]]: ; %bb10
	; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9			; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9
	; CHECK: buffer_store_dword			; CHECK: buffer_store_dword

	; CHECK: [[ENDBB]]:
	; CHECK-NEXT: s_endpgm			; CHECK-NEXT: s_endpgm

	define amdgpu_ps void @phi_use_def_before_kill() #0 {			define amdgpu_ps void @phi_use_def_before_kill() #0 {
	bb:			bb:
	%tmp = fadd float undef, 1.000000e+00			%tmp = fadd float undef, 1.000000e+00
	%tmp1 = fcmp olt float 0.000000e+00, %tmp			%tmp1 = fcmp olt float 0.000000e+00, %tmp
	%tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00			%tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
	call void @llvm.AMDGPU.kill(float %tmp2)			call void @llvm.AMDGPU.kill(float %tmp2)
	br i1 undef, label %phibb, label %bb8			br i1 undef, label %phibb, label %bb8

	▲ Show 20 Lines • Show All 107 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/smrd-vccz-bug.ll

	; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s			; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s
	; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s			; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s
	; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=NOVCCZ-BUG %s			; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=NOVCCZ-BUG %s

	; GCN-FUNC: {{^}}vccz_workaround:			; GCN-FUNC: {{^}}vccz_workaround:
	; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0			; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0
	; GCN: v_cmp_neq_f32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], 0			; GCN: v_cmp_neq_f32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], 0
	; GCN: s_and_b64 vcc, exec, [[MASK]]			; GCN: s_and_b64 vcc, exec, [[MASK]]
	; GCN: s_waitcnt lgkmcnt(0)			; GCN: s_waitcnt lgkmcnt(0)
	; VCCZ-BUG: s_mov_b64 vcc, vcc			; VCCZ-BUG: s_mov_b64 vcc, vcc
	; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc			; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc
	; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]			; GCN: s_cbranch_vccz [[SUCCESS:[0-9A-Za-z_]+]]
				; GCN: s_endpgm
				; GCN: [[SUCCESS]]:
	; GCN: buffer_store_dword			; GCN: buffer_store_dword
	; GCN: [[EXIT]]:
	; GCN: s_endpgm			; GCN: s_endpgm
	define void @vccz_workaround(i32 addrspace(2)* %in, i32 addrspace(1)* %out, float %cond) {			define void @vccz_workaround(i32 addrspace(2)* %in, i32 addrspace(1)* %out, float %cond) {
	entry:			entry:
	%cnd = fcmp oeq float 0.0, %cond			%cnd = fcmp oeq float 0.0, %cond
	%sgpr = load volatile i32, i32 addrspace(2)* %in			%sgpr = load volatile i32, i32 addrspace(2)* %in
	br i1 %cnd, label %if, label %endif			br i1 %cnd, label %if, label %endif

	if:			if:
	Show All 27 Lines

test/CodeGen/AMDGPU/uniform-cfg.ll

Show First 20 Lines • Show All 115 Lines • ▼ Show 20 Lines
}		}

; SI-LABEL: {{^}}uniform_if_move_valu:		; SI-LABEL: {{^}}uniform_if_move_valu:
; SI: v_add_f32_e32 [[CMP:v[0-9]+]]		; SI: v_add_f32_e32 [[CMP:v[0-9]+]]
; Using a floating-point value in an integer compare will cause the compare to		; Using a floating-point value in an integer compare will cause the compare to
; be selected for the SALU and then later moved to the VALU.		; be selected for the SALU and then later moved to the VALU.
; SI: v_cmp_ne_i32_e32 [[COND:vcc\|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]]		; SI: v_cmp_ne_i32_e32 [[COND:vcc\|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]]
; SI: s_and_b64 vcc, exec, [[COND]]		; SI: s_and_b64 vcc, exec, [[COND]]
; SI: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]]		; SI: s_cbranch_vccz [[SUCCESS_LABEL:[0-9_A-Za-z]+]]
		; SI: s_endpgm
		; SI: [[SUCCESS_LABEL]]:
; SI: buffer_store_dword		; SI: buffer_store_dword
; SI: [[ENDIF_LABEL]]:
; SI: s_endpgm		; SI: s_endpgm
define void @uniform_if_move_valu(i32 addrspace(1)* %out, float %a) {		define void @uniform_if_move_valu(i32 addrspace(1)* %out, float %a) {
entry:		entry:
%a.0 = fadd float %a, 10.0		%a.0 = fadd float %a, 10.0
%cond = bitcast float %a.0 to i32		%cond = bitcast float %a.0 to i32
%cmp = icmp eq i32 %cond, 5		%cmp = icmp eq i32 %cond, 5
br i1 %cmp, label %if, label %endif		br i1 %cmp, label %if, label %endif

if:		if:
store i32 0, i32 addrspace(1)* %out		store i32 0, i32 addrspace(1)* %out
br label %endif		br label %endif

endif:		endif:
ret void		ret void
}		}

; SI-LABEL: {{^}}uniform_if_move_valu_commute:		; SI-LABEL: {{^}}uniform_if_move_valu_commute:
; SI: v_add_f32_e32 [[CMP:v[0-9]+]]		; SI: v_add_f32_e32 [[CMP:v[0-9]+]]
; Using a floating-point value in an integer compare will cause the compare to		; Using a floating-point value in an integer compare will cause the compare to
; be selected for the SALU and then later moved to the VALU.		; be selected for the SALU and then later moved to the VALU.
; SI: v_cmp_gt_u32_e32 [[COND:vcc\|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]]		; SI: v_cmp_gt_u32_e32 [[COND:vcc\|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]]
; SI: s_and_b64 vcc, exec, [[COND]]		; SI: s_and_b64 vcc, exec, [[COND]]
; SI: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]]		; SI: s_cbranch_vccz [[SUCCESS_LABEL:[0-9_A-Za-z]+]]
		; SI: s_endpgm
		; SI: [[SUCCESS_LABEL]]:
; SI: buffer_store_dword		; SI: buffer_store_dword
; SI: [[ENDIF_LABEL]]:
; SI: s_endpgm		; SI: s_endpgm
define void @uniform_if_move_valu_commute(i32 addrspace(1)* %out, float %a) {		define void @uniform_if_move_valu_commute(i32 addrspace(1)* %out, float %a) {
entry:		entry:
%a.0 = fadd float %a, 10.0		%a.0 = fadd float %a, 10.0
%cond = bitcast float %a.0 to i32		%cond = bitcast float %a.0 to i32
%cmp = icmp ugt i32 %cond, 5		%cmp = icmp ugt i32 %cond, 5
br i1 %cmp, label %if, label %endif		br i1 %cmp, label %if, label %endif

▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines

if.end: ; preds = %if.else, %if.then		if.end: ; preds = %if.else, %if.then
store i32 3, i32 addrspace(1)* %out1		store i32 3, i32 addrspace(1)* %out1
ret void		ret void
}		}

; SI-LABEL: {{^}}icmp_2_users:		; SI-LABEL: {{^}}icmp_2_users:
; SI: s_cmp_lt_i32 s{{[0-9]+}}, 1		; SI: s_cmp_lt_i32 s{{[0-9]+}}, 1
; SI: s_cbranch_scc1 [[LABEL:[a-zA-Z0-9_]+]]		; SI: s_cbranch_scc0 [[SUCCESS:[a-zA-Z0-9_]+]]
		; SI: s_endpgm
		; SI: [[SUCCESS]]:
; SI: buffer_store_dword		; SI: buffer_store_dword
; SI: [[LABEL]]:
; SI: s_endpgm		; SI: s_endpgm
define void @icmp_2_users(i32 addrspace(1)* %out, i32 %cond) {		define void @icmp_2_users(i32 addrspace(1)* %out, i32 %cond) {
main_body:		main_body:
%0 = icmp sgt i32 %cond, 0		%0 = icmp sgt i32 %cond, 0
%1 = sext i1 %0 to i32		%1 = sext i1 %0 to i32
br i1 %0, label %IF, label %ENDIF		br i1 %0, label %IF, label %ENDIF

IF:		IF:
store i32 %1, i32 addrspace(1)* %out		store i32 %1, i32 addrspace(1)* %out
br label %ENDIF		br label %ENDIF

ENDIF: ; preds = %IF, %main_body		ENDIF: ; preds = %IF, %main_body
ret void		ret void
}		}

; SI-LABEL: {{^}}icmp_users_different_blocks:		; SI-LABEL: {{^}}icmp_users_different_blocks:
; SI: s_load_dword [[COND:s[0-9]+]]		; SI: s_load_dword [[COND:s[0-9]+]]
; SI: s_cmp_lt_i32 [[COND]], 1		; SI: s_cmp_lt_i32 [[COND]], 1
; SI: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]]		; SI: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]]
; SI: v_cmp_lt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], 0, [[COND]]		; SI: v_cmp_lt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], 0, [[COND]]
; SI: s_and_b64 vcc, exec, [[MASK]]		; SI: s_and_b64 vcc, exec, [[MASK]]
; SI: s_cbranch_vccnz [[EXIT]]		; SI: s_cbranch_vccz [[SUCCESS:[a-zA-Z0-9_]+]]
		; SI: s_endpgm
		; SI: {{^}}[[SUCCESS]]:
; SI: buffer_store		; SI: buffer_store
; SI: {{^}}[[EXIT]]:
; SI: s_endpgm		; SI: s_endpgm
define void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) {		define void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) {
bb:		bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0		%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%cmp0 = icmp sgt i32 %cond0, 0		%cmp0 = icmp sgt i32 %cond0, 0
%cmp1 = icmp sgt i32 %cond1, 0		%cmp1 = icmp sgt i32 %cond1, 0
br i1 %cmp0, label %bb2, label %bb9		br i1 %cmp0, label %bb2, label %bb9

▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	if_uniform:
br label %endif		br label %endif

endif:		endif:
ret void		ret void
}		}

; SI-LABEL: {{^}}divergent_inside_uniform:		; SI-LABEL: {{^}}divergent_inside_uniform:
; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0		; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0
; SI: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]]		; SI: s_cbranch_scc0 [[SUCCESS_LABEL:[0-9_A-Za-z]+]]
		; SI: s_endpgm
		; SI: [[SUCCESS_LABEL]]:
; SI: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}		; SI: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc		; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
; SI: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]		; SI: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1		; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
; SI: buffer_store_dword [[ONE]]		; SI: buffer_store_dword [[ONE]]
; SI: [[ENDIF_LABEL]]:
; SI: s_endpgm		; SI: s_endpgm
define void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) {		define void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) {
entry:		entry:
%u_cmp = icmp eq i32 %cond, 0		%u_cmp = icmp eq i32 %cond, 0
br i1 %u_cmp, label %if, label %endif		br i1 %u_cmp, label %if, label %endif

if:		if:
store i32 0, i32 addrspace(1)* %out		store i32 0, i32 addrspace(1)* %out
Show All 12 Lines
; SI-LABEL: {{^}}divergent_if_uniform_if:		; SI-LABEL: {{^}}divergent_if_uniform_if:
; SI: v_cmp_eq_i32_e32 vcc, 0, v0		; SI: v_cmp_eq_i32_e32 vcc, 0, v0
; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc		; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
; SI: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]		; SI: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1		; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
; SI: buffer_store_dword [[ONE]]		; SI: buffer_store_dword [[ONE]]
; SI: s_or_b64 exec, exec, [[MASK]]		; SI: s_or_b64 exec, exec, [[MASK]]
; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0		; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0
; SI: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]]		; SI: s_cbranch_scc0 [[THREE:[A-Z0-9_]+]]
		; SI: s_endpgm
		; SI: [[THREE]]:
; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2		; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
; SI: buffer_store_dword [[TWO]]		; SI: buffer_store_dword [[TWO]]
; SI: [[EXIT]]:
; SI: s_endpgm		; SI: s_endpgm
define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) {		define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) {
entry:		entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0		%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%d_cmp = icmp eq i32 %tid, 0		%d_cmp = icmp eq i32 %tid, 0
br i1 %d_cmp, label %if, label %endif		br i1 %d_cmp, label %if, label %endif

if:		if:
▲ Show 20 Lines • Show All 56 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/uniform-crash.ll

	; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs \| FileCheck --check-prefix=GCN %s			; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs \| FileCheck --check-prefix=GCN %s
	; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs \| FileCheck --check-prefix=GCN %s			; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs \| FileCheck --check-prefix=GCN %s

	; GCN-LABEL: {{^}}icmp_2_users:			; GCN-LABEL: {{^}}icmp_2_users:
	; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 1			; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 1
	; GCN: s_cbranch_scc1 [[LABEL:BB[0-9_A-Z]+]]			; GCN: s_cbranch_scc0 [[LABEL:BB[0-9_A-Z]+]]
				; GCN: s_endpgm
	; GCN: [[LABEL]]:			; GCN: [[LABEL]]:
	; GCN-NEXT: s_endpgm			; GCN: s_endpgm
	define void @icmp_2_users(i32 addrspace(1)* %out, i32 %cond) {			define void @icmp_2_users(i32 addrspace(1)* %out, i32 %cond) {
	main_body:			main_body:
	%0 = icmp sgt i32 %cond, 0			%0 = icmp sgt i32 %cond, 0
	%1 = sext i1 %0 to i32			%1 = sext i1 %0 to i32
	br i1 %0, label %IF, label %ENDIF			br i1 %0, label %IF, label %ENDIF

	IF:			IF:
	store i32 %1, i32 addrspace(1)* %out			store i32 %1, i32 addrspace(1)* %out
	▲ Show 20 Lines • Show All 41 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/valu-i1.ll

	Show First 20 Lines • Show All 110 Lines • ▼ Show 20 Lines
	; Load loop limit from buffer			; Load loop limit from buffer
	; Branch to exit if uniformly not taken			; Branch to exit if uniformly not taken
	; SI: ; BB#0:			; SI: ; BB#0:
	; SI: buffer_load_dword [[VBOUND:v[0-9]+]]			; SI: buffer_load_dword [[VBOUND:v[0-9]+]]
	; SI: v_cmp_lt_i32_e32 vcc			; SI: v_cmp_lt_i32_e32 vcc
	; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc			; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc
	; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]]			; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]]
	; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]			; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
				; SI: s_branch [[LABEL_PREHEADER:BB[0-9]+_[0-9]+]]

				; SI: [[LABEL_EXIT]]:
				; SI: s_endpgm

	; Initialize inner condition to false			; Initialize inner condition to false
	; SI: BB{{[0-9]+_[0-9]+}}: ; %bb10.preheader			; SI: [[LABEL_PREHEADER]]:
	; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}}			; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}}
	; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]]			; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]]

	; Clear exec bits for workitems that load -1s			; Clear exec bits for workitems that load -1s
	; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:			; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
	; SI: buffer_load_dword [[B:v[0-9]+]]			; SI: buffer_load_dword [[B:v[0-9]+]]
	; SI: buffer_load_dword [[A:v[0-9]+]]			; SI: buffer_load_dword [[A:v[0-9]+]]
	; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]]			; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]]
	Show All 11 Lines
	; SI: [[LABEL_FLOW]]:			; SI: [[LABEL_FLOW]]:
	; SI: s_or_b64 exec, exec, [[ORNEG2]]			; SI: s_or_b64 exec, exec, [[ORNEG2]]
	; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[TMP]]			; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[TMP]]
	; SI: s_andn2_b64 exec, exec, [[COND_STATE]]			; SI: s_andn2_b64 exec, exec, [[COND_STATE]]
	; SI: s_cbranch_execnz [[LABEL_LOOP]]			; SI: s_cbranch_execnz [[LABEL_LOOP]]

	; SI: BB#5			; SI: BB#5
	; SI: s_or_b64 exec, exec, [[COND_STATE]]			; SI: s_or_b64 exec, exec, [[COND_STATE]]

	; SI: [[LABEL_EXIT]]:
	; SI-NOT: [[COND_STATE]]			; SI-NOT: [[COND_STATE]]
	; SI: s_endpgm			; SI: s_endpgm

	define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 {			define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 {
	bb:			bb:
	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0			%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
	%tmp4 = sext i32 %tmp to i64			%tmp4 = sext i32 %tmp to i64
	%tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4			%tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4
	Show All 31 Lines

test/CodeGen/ARM/2013-05-05-IfConvertBug.ll

	Show First 20 Lines • Show All 106 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: wrapDistance:			; CHECK-LABEL: wrapDistance:
	; CHECK: cmp r1, #59			; CHECK: cmp r1, #59
	; CHECK-NEXT: itt le			; CHECK-NEXT: itt le
	; CHECK-NEXT: suble r0, r2, #1			; CHECK-NEXT: suble r0, r2, #1
	; CHECK-NEXT: bxle lr			; CHECK-NEXT: bxle lr
	; CHECK-NEXT: subs [[REG:r[0-9]+]], #120			; CHECK-NEXT: subs [[REG:r[0-9]+]], #120
	; CHECK-NEXT: cmp [[REG]], r1			; CHECK-NEXT: cmp [[REG]], r1
	; CHECK-NOT: it lt			; CHECK-NOT: it lt
	; CHECK-NEXT: bge [[LABEL:.+]]			; CHECK-NEXT: blt [[LABEL:.+]]
	; Next BB			; Next BB
				; CHECK: subs r0, r1, r0
				; CHECK-NEXT: bx lr
				; Next BB
				; CHECK: [[LABEL]]:
	; CHECK-NOT: cmplt			; CHECK-NOT: cmplt
	; CHECK: cmp r0, #119			; CHECK: cmp r0, #119
	; CHECK-NEXT: itt le			; CHECK-NEXT: itt le
	; CHECK-NEXT: addle r0, r1, #1			; CHECK-NEXT: addle r0, r1, #1
	; CHECK-NEXT: bxle lr			; CHECK-NEXT: bxle lr
	; Next BB
	; CHECK: [[LABEL]]:
	; CHECK-NEXT: subs r0, r1, r0			; CHECK-NEXT: subs r0, r1, r0
	; CHECK-NEXT: bx lr			; CHECK-NEXT: bx lr

	; CHECK-V8-LABEL: wrapDistance:			; CHECK-V8-LABEL: wrapDistance:
	; CHECK-V8: cmp r1, #59			; CHECK-V8: cmp r1, #59
	; CHECK-V8-NEXT: bgt			; CHECK-V8-NEXT: bgt
	; CHECK-V8-NEXT: %if.then			; CHECK-V8-NEXT: %if.then
	; CHECK-V8-NEXT: subs r0, r2, #1			; CHECK-V8-NEXT: subs r0, r2, #1
	▲ Show 20 Lines • Show All 43 Lines • Show Last 20 Lines

test/CodeGen/ARM/arm-shrink-wrapping.ll

	Show All 17 Lines
	; the diffs.			; the diffs.

	; Initial motivating example: Simple diamond with a call just on one side.			; Initial motivating example: Simple diamond with a call just on one side.
	; CHECK-LABEL: foo:			; CHECK-LABEL: foo:
	;			;
	; Compare the arguments and jump to exit.			; Compare the arguments and jump to exit.
	; No prologue needed.			; No prologue needed.
	; ENABLE: cmp r0, r1			; ENABLE: cmp r0, r1
	; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]			; ENABLE-NEXT: blt [[SUCCESS_LABEL:LBB[0-9_]+]]
				; ENABLE: bx lr
	;			;
	; Prologue code.			; Prologue code.
				; ENABLE: [[SUCCESS_LABEL]]:
	; CHECK: push {r7, lr}			; CHECK: push {r7, lr}
	; CHECK-NEXT: mov r7, sp			; CHECK-NEXT: mov r7, sp
	;;			;;
	; Compare the arguments and jump to exit.			; Compare the arguments and jump to exit.
	; After the prologue is set.			; After the prologue is set.
	; DISABLE: sub sp			; DISABLE: sub sp
	; DISABLE: cmp r0, r1			; DISABLE: cmp r0, r1
	; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]			; DISABLE-NEXT: blt [[SUCCESS_LABEL:LBB[0-9_]+]]
				; ARM-DISABLE: mov sp, r7
				; THUMB-DISABLE: add sp,
				; DISABLE-NEXT: pop {r7, pc}
	;			;
				; DISABLE: [[SUCCESS_LABEL]]:
	; Store %a in the alloca.			; Store %a in the alloca.
	; ARM-ENABLE: push {r0}			; ARM-ENABLE: push {r0}
	; THUMB-ENABLE: str r0, [sp, #-4]			; THUMB-ENABLE: str r0, [sp, #-4]
	; DISABLE: str r0, [sp]			; DISABLE: str r0, [sp]
	; Set the alloca address in the second argument.			; Set the alloca address in the second argument.
	; CHECK-NEXT: mov r1, sp			; CHECK-NEXT: mov r1, sp
	; Set the first argument to zero.			; Set the first argument to zero.
	; CHECK-NEXT: mov{{s?}} r0, #0			; CHECK-NEXT: mov{{s?}} r0, #0
	; CHECK-NEXT: bl{{x?}} _doSomething			; CHECK-NEXT: bl{{x?}} _doSomething
	;			;
	; With shrink-wrapping, epilogue is just after the call.			; With shrink-wrapping, epilogue is just after the call.
	; ARM-ENABLE-NEXT: mov sp, r7			; ARM-ENABLE-NEXT: mov sp, r7
	; THUMB-ENABLE-NEXT: add sp, #4			; THUMB-ENABLE-NEXT: add sp, #4
	; ENABLE-NEXT: pop{{(\.w)?}} {r7, lr}			; ENABLE-NEXT: pop{{(\.w)?}} {r7, lr}
	;			;
	; CHECK: [[EXIT_LABEL]]:			; Late stage tail-duplication removes the exit label with shrink-wrapping.
	;			; Without shrink-wrapping, epilogue is before the return.
	; Without shrink-wrapping, epilogue is in the exit block.
	; Epilogue code. (What we pop does not matter.)			; Epilogue code. (What we pop does not matter.)
	; ARM-DISABLE: mov sp, r7			; ARM-DISABLE: mov sp, r7
	; THUMB-DISABLE: add sp,			; THUMB-DISABLE: add sp,
	; DISABLE-NEXT: pop {r7, pc}			; DISABLE-NEXT: pop {r7, pc}
	;			;
	; ENABLE-NEXT: bx lr			; ENABLE-NEXT: bx lr
	define i32 @foo(i32 %a, i32 %b) {			define i32 @foo(i32 %a, i32 %b) {
	%tmp = alloca i32, align 4			%tmp = alloca i32, align 4
	▲ Show 20 Lines • Show All 319 Lines • ▼ Show 20 Lines
	; ARM-DISABLE: cmp r0, #0			; ARM-DISABLE: cmp r0, #0
	; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]			; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
	; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]			; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
	;			;
	; CHECK: mov{{s?}} [[IV:r[0-9]+]], #10			; CHECK: mov{{s?}} [[IV:r[0-9]+]], #10
	;			;
	; Next BB.			; Next BB.
	; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body			; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
	; ARM: subs [[IV]], [[IV]], #1			; ARM-DAG: subs [[IV]], [[IV]], #1
	; THUMB: subs [[IV]], #1			; THUMB-DAG: subs [[IV]], #1
	; CHECK: add{{(\.w)?}} r4, r4, #1			; CHECK-DAG: add{{(\.w)?}} r4, r4, #1
	; CHECK: bne [[LOOP]]			; CHECK: bne [[LOOP]]
	;			;
	; Next BB.			; Next BB.
	; CHECK: mov{{s?}} r0, #0			; CHECK: mov{{s?}} r0, #0
	;			;
	; Duplicated epilogue.			; Duplicated epilogue.
	; DISABLE: pop {r4, r7, pc}			; DISABLE: pop {r4, r7, pc}
	;			;
	▲ Show 20 Lines • Show All 286 Lines • Show Last 20 Lines

test/CodeGen/ARM/atomic-cmpxchg.ll

	Show First 20 Lines • Show All 66 Lines • ▼ Show 20 Lines
	; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]:			; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]:
	; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]			; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
	; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]			; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
	; CHECK-ARMV7-NEXT: bne [[FAIL:.LBB[0-9_]+]]			; CHECK-ARMV7-NEXT: bne [[FAIL:.LBB[0-9_]+]]
	; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]			; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
	; CHECK-ARMV7-NEXT: mov [[RES:r[0-9]+]], #1			; CHECK-ARMV7-NEXT: mov [[RES:r[0-9]+]], #1
	; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0			; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0
	; CHECK-ARMV7-NEXT: bne [[TRY]]			; CHECK-ARMV7-NEXT: bne [[TRY]]
	; CHECK-ARMV7-NEXT: b [[END:.LBB[0-9_]+]]			; CHECK-ARMV7-NEXT: mov r0, [[RES]]
				; CHECK-ARMV7-NEXT: bx lr
	; CHECK-ARMV7-NEXT: [[FAIL]]:			; CHECK-ARMV7-NEXT: [[FAIL]]:
	; CHECK-ARMV7-NEXT: clrex			; CHECK-ARMV7-NEXT: clrex
	; CHECK-ARMV7-NEXT: mov [[RES]], #0			; CHECK-ARMV7-NEXT: mov [[RES]], #0
	; CHECK-ARMV7-NEXT: [[END]]:
	; CHECK-ARMV7-NEXT: mov r0, [[RES]]			; CHECK-ARMV7-NEXT: mov r0, [[RES]]
	; CHECK-ARMV7-NEXT: bx lr			; CHECK-ARMV7-NEXT: bx lr

	; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:			; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
	; CHECK-THUMBV7-NEXT: .fnstart			; CHECK-THUMBV7-NEXT: .fnstart
	; CHECK-THUMBV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1			; CHECK-THUMBV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
	; CHECK-THUMBV7-NEXT: b [[TRYLD:.LBB[0-9_]+]]			; CHECK-THUMBV7-NEXT: b [[TRYLD:.LBB[0-9_]+]]
	; CHECK-THUMBV7-NEXT: [[TRYST:.LBB[0-9_]+]]:			; CHECK-THUMBV7-NEXT: [[TRYST:.LBB[0-9_]+]]:
	Show All 12 Lines

test/CodeGen/ARM/atomic-op.ll

	Show First 20 Lines • Show All 291 Lines • ▼ Show 20 Lines
	; CHECK-NOT: dmb ish			; CHECK-NOT: dmb ish
	; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:			; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
	; CHECK: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]			; CHECK: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
	; CHECK: cmp [[OLDVAL]], r1			; CHECK: cmp [[OLDVAL]], r1
	; CHECK: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]			; CHECK: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
	; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]			; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
	; CHECK: cmp [[SUCCESS]], #0			; CHECK: cmp [[SUCCESS]], #0
	; CHECK: bne [[LOOP_BB]]			; CHECK: bne [[LOOP_BB]]
	; CHECK: b [[END_BB:\.?LBB[0-9]+_[0-9]+]]			; CHECK: dmb ish
				; CHECK: bx lr
	; CHECK: [[FAIL_BB]]:			; CHECK: [[FAIL_BB]]:
	; CHECK-NEXT: clrex			; CHECK-NEXT: clrex
	; CHECK-NEXT: [[END_BB]]:
	; CHECK: dmb ish			; CHECK: dmb ish
	; CHECK: bx lr			; CHECK: bx lr

	ret i32 %oldval			ret i32 %oldval
	}			}

	define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind {			define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind {
	; CHECK-LABEL: load_load_add_acquire			; CHECK-LABEL: load_load_add_acquire
	▲ Show 20 Lines • Show All 63 Lines • Show Last 20 Lines

test/CodeGen/ARM/atomic-ops-v8.ll

	Show First 20 Lines • Show All 1,039 Lines • ▼ Show 20 Lines
	; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0			; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0

	; CHECK: .LBB{{[0-9]+}}_1:			; CHECK: .LBB{{[0-9]+}}_1:
	; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]			; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
	; r0 below is a reasonable guess but could change: it certainly comes into the			; r0 below is a reasonable guess but could change: it certainly comes into the
	; function there.			; function there.
	; CHECK-ARM-NEXT: cmp r[[OLD]], r0			; CHECK-ARM-NEXT: cmp r[[OLD]], r0
	; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]			; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
	; CHECK-NEXT: bne .LBB{{[0-9]+}}_3			; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
	; CHECK-NEXT: BB#2:			; CHECK-NEXT: BB#2:
	; As above, r1 is a reasonable guess.			; As above, r1 is a reasonable guess.
	; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]			; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
	; CHECK-NEXT: cmp [[STATUS]], #0			; CHECK-NEXT: cmp [[STATUS]], #0
	; CHECK-NEXT: bne .LBB{{[0-9]+}}_1			; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
	; CHECK-NEXT: b .LBB{{[0-9]+}}_4			; CHECK-ARM: mov r0, r[[OLD]]
	; CHECK-NEXT: .LBB{{[0-9]+}}_3:			; CHECK: bx lr
	; CHECK-NEXT: clrex
	; CHECK-NEXT: .LBB{{[0-9]+}}_4:			; CHECK-NEXT: .LBB{{[0-9]+}}_4:
				; CHECK-NEXT: clrex
	; CHECK-NOT: dmb			; CHECK-NOT: dmb
	; CHECK-NOT: mcr			; CHECK-NOT: mcr

	; CHECK-ARM: mov r0, r[[OLD]]			; CHECK-ARM: mov r0, r[[OLD]]
				; CHECK-ARM-NEXT: bx lr
	ret i8 %old			ret i8 %old
	}			}

	define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounwind {			define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounwind {
	; CHECK-LABEL: test_atomic_cmpxchg_i16:			; CHECK-LABEL: test_atomic_cmpxchg_i16:
	%pair = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst			%pair = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
	%old = extractvalue { i16, i1 } %pair, 0			%old = extractvalue { i16, i1 } %pair, 0
	; CHECK-NOT: dmb			; CHECK-NOT: dmb
	; CHECK-NOT: mcr			; CHECK-NOT: mcr
	; CHECK-DAG: movw r[[ADDR:[0-9]+]], :lower16:var16			; CHECK-DAG: movw r[[ADDR:[0-9]+]], :lower16:var16
	; CHECK-DAG: movt r[[ADDR]], :upper16:var16			; CHECK-DAG: movt r[[ADDR]], :upper16:var16
	; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0			; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0

	; CHECK: .LBB{{[0-9]+}}_1:			; CHECK: .LBB{{[0-9]+}}_1:
	; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]			; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
	; r0 below is a reasonable guess but could change: it certainly comes into the			; r0 below is a reasonable guess but could change: it certainly comes into the
	; function there.			; function there.
	; CHECK-ARM-NEXT: cmp r[[OLD]], r0			; CHECK-ARM-NEXT: cmp r[[OLD]], r0
	; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]			; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
	; CHECK-NEXT: bne .LBB{{[0-9]+}}_3			; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
	; CHECK-NEXT: BB#2:			; CHECK-NEXT: BB#2:
	; As above, r1 is a reasonable guess.			; As above, r1 is a reasonable guess.
	; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]			; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
	; CHECK-NEXT: cmp [[STATUS]], #0			; CHECK-NEXT: cmp [[STATUS]], #0
	; CHECK-NEXT: bne .LBB{{[0-9]+}}_1			; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
	; CHECK-NEXT: b .LBB{{[0-9]+}}_4			; CHECK-ARM: mov r0, r[[OLD]]
	; CHECK-NEXT: .LBB{{[0-9]+}}_3:			; CHECK: bx lr
	; CHECK-NEXT: clrex
	; CHECK-NEXT: .LBB{{[0-9]+}}_4:			; CHECK-NEXT: .LBB{{[0-9]+}}_4:
				; CHECK-NEXT: clrex
	; CHECK-NOT: dmb			; CHECK-NOT: dmb
	; CHECK-NOT: mcr			; CHECK-NOT: mcr

	; CHECK-ARM: mov r0, r[[OLD]]			; CHECK-ARM: mov r0, r[[OLD]]
				; CHECK-ARM-NEXT: bx lr
	ret i16 %old			ret i16 %old
	}			}

	define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {			define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
	; CHECK-LABEL: test_atomic_cmpxchg_i32:			; CHECK-LABEL: test_atomic_cmpxchg_i32:
	%pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic			%pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
	%old = extractvalue { i32, i1 } %pair, 0			%old = extractvalue { i32, i1 } %pair, 0
	store i32 %old, i32* @var32			store i32 %old, i32* @var32
	; CHECK-NOT: dmb			; CHECK-NOT: dmb
	; CHECK-NOT: mcr			; CHECK-NOT: mcr
	; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32			; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
	; CHECK: movt r[[ADDR]], :upper16:var32			; CHECK: movt r[[ADDR]], :upper16:var32

	; CHECK: .LBB{{[0-9]+}}_1:			; CHECK: .LBB{{[0-9]+}}_1:
	; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]			; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
	; r0 below is a reasonable guess but could change: it certainly comes into the			; r0 below is a reasonable guess but could change: it certainly comes into the
	; function there.			; function there.
	; CHECK-NEXT: cmp r[[OLD]], r0			; CHECK-NEXT: cmp r[[OLD]], r0
	; CHECK-NEXT: bne .LBB{{[0-9]+}}_3			; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
	; CHECK-NEXT: BB#2:			; CHECK-NEXT: BB#2:
	; As above, r1 is a reasonable guess.			; As above, r1 is a reasonable guess.
	; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]			; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
	; CHECK-NEXT: cmp [[STATUS]], #0			; CHECK-NEXT: cmp [[STATUS]], #0
	; CHECK-NEXT: bne .LBB{{[0-9]+}}_1			; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
	; CHECK-NEXT: b .LBB{{[0-9]+}}_4			; CHECK: str{{(.w)?}} r[[OLD]],
	; CHECK-NEXT: .LBB{{[0-9]+}}_3:			; CHECK-NEXT: bx lr
	; CHECK-NEXT: clrex
	; CHECK-NEXT: .LBB{{[0-9]+}}_4:			; CHECK-NEXT: .LBB{{[0-9]+}}_4:
				; CHECK-NEXT: clrex
	; CHECK-NOT: dmb			; CHECK-NOT: dmb
	; CHECK-NOT: mcr			; CHECK-NOT: mcr

	; CHECK: str{{(.w)?}} r[[OLD]],			; CHECK: str{{(.w)?}} r[[OLD]],
				; CHECK-ARM-NEXT: bx lr
	ret void			ret void
	}			}

	define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {			define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
	; CHECK-LABEL: test_atomic_cmpxchg_i64:			; CHECK-LABEL: test_atomic_cmpxchg_i64:
	%pair = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic			%pair = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
	%old = extractvalue { i64, i1 } %pair, 0			%old = extractvalue { i64, i1 } %pair, 0
	; CHECK-NOT: dmb			; CHECK-NOT: dmb
	; CHECK-NOT: mcr			; CHECK-NOT: mcr
	; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64			; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
	; CHECK: movt r[[ADDR]], :upper16:var64			; CHECK: movt r[[ADDR]], :upper16:var64

	; CHECK: .LBB{{[0-9]+}}_1:			; CHECK: .LBB{{[0-9]+}}_1:
	; CHECK: ldrexd [[OLD1:r[0-9]+\|lr]], [[OLD2:r[0-9]+\|lr]], [r[[ADDR]]]			; CHECK: ldrexd [[OLD1:r[0-9]+\|lr]], [[OLD2:r[0-9]+\|lr]], [r[[ADDR]]]
	; r0, r1 below is a reasonable guess but could change: it certainly comes into the			; r0, r1 below is a reasonable guess but could change: it certainly comes into the
	; function there.			; function there.
	; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+\|lr]], [[OLD1]], r0			; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+\|lr]], [[OLD1]], r0
	; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+\|lr]], [[OLD2]], r1			; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+\|lr]], [[OLD2]], r1
	; CHECK-ARM-LE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]			; CHECK-ARM-LE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
	; CHECK-THUMB-LE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_HI]], [[MISMATCH_LO]]			; CHECK-THUMB-LE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_HI]], [[MISMATCH_LO]]
	; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+\|lr]], [[OLD2]], r1			; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+\|lr]], [[OLD2]], r1
	; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+\|lr]], [[OLD1]], r0			; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+\|lr]], [[OLD1]], r0
	; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]]			; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]]
	; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]]			; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]]
	; CHECK-NEXT: bne .LBB{{[0-9]+}}_3			; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
	; CHECK-NEXT: BB#2:			; CHECK-NEXT: BB#2:
	; As above, r2, r3 is a reasonable guess.			; As above, r2, r3 is a reasonable guess.
	; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]			; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
	; CHECK-NEXT: cmp [[STATUS]], #0			; CHECK-NEXT: cmp [[STATUS]], #0
	; CHECK-NEXT: bne .LBB{{[0-9]+}}_1			; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
	; CHECK-NEXT: b .LBB{{[0-9]+}}_4			; CHECK: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
	; CHECK-NEXT: .LBB{{[0-9]+}}_3:			; CHECK-NEXT: pop
	; CHECK-NEXT: clrex
	; CHECK-NEXT: .LBB{{[0-9]+}}_4:			; CHECK-NEXT: .LBB{{[0-9]+}}_4:
				; CHECK-NEXT: clrex
	; CHECK-NOT: dmb			; CHECK-NOT: dmb
	; CHECK-NOT: mcr			; CHECK-NOT: mcr

	; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]			; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
	store i64 %old, i64* @var64			store i64 %old, i64* @var64
	ret void			ret void
	}			}

	▲ Show 20 Lines • Show All 255 Lines • Show Last 20 Lines

test/CodeGen/ARM/fold-stack-adjust.ll

	Show First 20 Lines • Show All 129 Lines • ▼ Show 20 Lines

	; PR18136: there was a bug determining where the first eligible pop in a			; PR18136: there was a bug determining where the first eligible pop in a
	; basic-block was when the entire block was epilogue code.			; basic-block was when the entire block was epilogue code.
	define void @test_fold_point(i1 %tst) minsize {			define void @test_fold_point(i1 %tst) minsize {
	; CHECK-LABEL: test_fold_point:			; CHECK-LABEL: test_fold_point:

	; Important to check for beginning of basic block, because if it gets			; Important to check for beginning of basic block, because if it gets
	; if-converted the test is probably no longer checking what it should.			; if-converted the test is probably no longer checking what it should.
	; CHECK: {{LBB[0-9]+_2}}:			; CHECK: %end
	; CHECK-NEXT: vpop {d7, d8}			; CHECK-NEXT: vpop {d7, d8}
	; CHECK-NEXT: pop {r4, pc}			; CHECK-NEXT: pop {r4, pc}

	; With a guaranteed frame-pointer, we want to make sure that its offset in the			; With a guaranteed frame-pointer, we want to make sure that its offset in the
	; push block is correct, even if a few registers have been tacked onto a later			; push block is correct, even if a few registers have been tacked onto a later
	; vpush (PR18160).			; vpush (PR18160).
	; CHECK-IOS-LABEL: test_fold_point:			; CHECK-IOS-LABEL: test_fold_point:
	; CHECK-IOS: push {r4, r7, lr}			; CHECK-IOS: push {r4, r7, lr}
	▲ Show 20 Lines • Show All 75 Lines • Show Last 20 Lines

test/CodeGen/ARM/machine-cse-cmp.ll

	Show First 20 Lines • Show All 46 Lines • ▼ Show 20 Lines
	declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind			declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind

	; rdar://12462006			; rdar://12462006
	define i8* @f3(i8* %base, i32* nocapture %offset, i32 %size) nounwind {			define i8* @f3(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
	entry:			entry:
	; CHECK-LABEL: f3:			; CHECK-LABEL: f3:
	; CHECK-NOT: sub			; CHECK-NOT: sub
	; CHECK: cmp			; CHECK: cmp
	; CHECK: blt			; CHECK: bge
	%0 = load i32, i32* %offset, align 4			%0 = load i32, i32* %offset, align 4
	%cmp = icmp slt i32 %0, %size			%cmp = icmp slt i32 %0, %size
	%s = sub nsw i32 %0, %size			%s = sub nsw i32 %0, %size
	%size2 = sub nsw i32 %size, 0			%size2 = sub nsw i32 %size, 0
	br i1 %cmp, label %return, label %if.end			br i1 %cmp, label %return, label %if.end

	if.end:			if.end:
	; We are checking cse between %sub here and %s in entry block.			; We are checking cse between %sub here and %s in entry block.
	Show All 16 Lines

test/CodeGen/Mips/llvm-ir/ashr.ll

	Show First 20 Lines • Show All 77 Lines • ▼ Show 20 Lines
	}			}

	define signext i64 @ashr_i64(i64 signext %a, i64 signext %b) {			define signext i64 @ashr_i64(i64 signext %a, i64 signext %b) {
	entry:			entry:
	; ALL-LABEL: ashr_i64:			; ALL-LABEL: ashr_i64:

	; M2: srav $[[T0:[0-9]+]], $4, $7			; M2: srav $[[T0:[0-9]+]], $4, $7
	; M2: andi $[[T1:[0-9]+]], $7, 32			; M2: andi $[[T1:[0-9]+]], $7, 32
	; M2: bnez $[[T1]], $[[BB0:BB[0-9_]+]]			; M2: beqz $[[T1]], $[[BB0:BB[0-9_]+]]
	; M2: move $3, $[[T0]]			; M2: move $3, $[[T0]]
				; M2: bnez $[[T1]], $[[BB1:BB[0-9_]+]]
				; M2: nop
				; M2: $[[EXIT:BB[0-9_]+]]:
				; M2: jr $ra
				; M2: nop
				; M2: $[[BB0]]:
	; M2: srlv $[[T2:[0-9]+]], $5, $7			; M2: srlv $[[T2:[0-9]+]], $5, $7
	; M2: not $[[T3:[0-9]+]], $7			; M2: not $[[T3:[0-9]+]], $7
	; M2: sll $[[T4:[0-9]+]], $4, 1			; M2: sll $[[T4:[0-9]+]], $4, 1
	; M2: sllv $[[T5:[0-9]+]], $[[T4]], $[[T3]]			; M2: sllv $[[T5:[0-9]+]], $[[T4]], $[[T3]]
				; M2: beqz $[[T1]], $[[EXIT]]
	; M2: or $3, $[[T3]], $[[T2]]			; M2: or $3, $[[T3]], $[[T2]]
	; M2: $[[BB0]]:
	; M2: beqz $[[T1]], $[[BB1:BB[0-9_]+]]
	; M2: nop
	; M2: sra $2, $4, 31
	; M2: $[[BB1]]:			; M2: $[[BB1]]:
	; M2: jr $ra			; M2: jr $ra
	; M2: nop			; M2: sra $2, $4, 31

	; 32R1-R5: srlv $[[T0:[0-9]+]], $5, $7			; 32R1-R5: srlv $[[T0:[0-9]+]], $5, $7
	; 32R1-R5: not $[[T1:[0-9]+]], $7			; 32R1-R5: not $[[T1:[0-9]+]], $7
	; 32R1-R5: sll $[[T2:[0-9]+]], $4, 1			; 32R1-R5: sll $[[T2:[0-9]+]], $4, 1
	; 32R1-R5: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]]			; 32R1-R5: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
	; 32R1-R5: or $3, $[[T3]], $[[T0]]			; 32R1-R5: or $3, $[[T3]], $[[T0]]
	; 32R1-R5: srav $[[T4:[0-9]+]], $4, $7			; 32R1-R5: srav $[[T4:[0-9]+]], $4, $7
	; 32R1-R5: andi $[[T5:[0-9]+]], $7, 32			; 32R1-R5: andi $[[T5:[0-9]+]], $7, 32
	▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines
	entry:			entry:
	; ALL-LABEL: ashr_i128:			; ALL-LABEL: ashr_i128:

	; GP32: lw $25, %call16(__ashrti3)($gp)			; GP32: lw $25, %call16(__ashrti3)($gp)

	; M3: sll $[[T0:[0-9]+]], $7, 0			; M3: sll $[[T0:[0-9]+]], $7, 0
	; M3: dsrav $[[T1:[0-9]+]], $4, $7			; M3: dsrav $[[T1:[0-9]+]], $4, $7
	; M3: andi $[[T2:[0-9]+]], $[[T0]], 64			; M3: andi $[[T2:[0-9]+]], $[[T0]], 64
	; M3: bnez $[[T3:[0-9]+]], [[BB0:.LBB[0-9_]+]]			; M3: beqz $[[T3:[0-9]+]], .[[BB0:LBB[0-9_]+]]
	; M3: move $3, $[[T1]]			; M3: move $3, $[[T1]]
				; M3: bnez $[[T3]], .[[BB1:LBB[0-9_]+]]
				; M3: nop
				; M3: .[[EXIT:LBB[0-9_]+]]:
				; M3: jr $ra
				; M3: nop
				; M3: .[[BB0]]:
	; M3: dsrlv $[[T4:[0-9]+]], $5, $7			; M3: dsrlv $[[T4:[0-9]+]], $5, $7
	; M3: dsll $[[T5:[0-9]+]], $4, 1			; M3: dsll $[[T5:[0-9]+]], $4, 1
	; M3: not $[[T6:[0-9]+]], $[[T0]]			; M3: not $[[T6:[0-9]+]], $[[T0]]
	; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]]			; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]]
				; M3: beqz $[[T3]], .[[EXIT]]
	; M3: or $3, $[[T7]], $[[T4]]			; M3: or $3, $[[T7]], $[[T4]]
	; M3: [[BB0]]:			; M3: .[[BB1]]:
	; M3: beqz $[[T3]], [[BB1:.LBB[0-9_]+]]
	; M3: nop
	; M3: dsra $2, $4, 63
	; M3: [[BB1]]:
	; M3: jr $ra			; M3: jr $ra
	; M3: nop			; M3: dsra $2, $4, 63

	; GP64-NOT-R6: dsrlv $[[T0:[0-9]+]], $5, $7			; GP64-NOT-R6: dsrlv $[[T0:[0-9]+]], $5, $7
	; GP64-NOT-R6: dsll $[[T1:[0-9]+]], $4, 1			; GP64-NOT-R6: dsll $[[T1:[0-9]+]], $4, 1
	; GP64-NOT-R6: sll $[[T2:[0-9]+]], $7, 0			; GP64-NOT-R6: sll $[[T2:[0-9]+]], $7, 0
	; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T2]]			; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T2]]
	; GP64-NOT-R6: dsllv $[[T4:[0-9]+]], $[[T1]], $[[T3]]			; GP64-NOT-R6: dsllv $[[T4:[0-9]+]], $[[T1]], $[[T3]]
	; GP64-NOT-R6: or $3, $[[T4]], $[[T0]]			; GP64-NOT-R6: or $3, $[[T4]], $[[T0]]
	; GP64-NOT-R6: dsrav $2, $4, $7			; GP64-NOT-R6: dsrav $2, $4, $7
	Show All 29 Lines

test/CodeGen/Mips/llvm-ir/lshr.ll

	Show First 20 Lines • Show All 75 Lines • ▼ Show 20 Lines
	}			}

	define signext i64 @lshr_i64(i64 signext %a, i64 signext %b) {			define signext i64 @lshr_i64(i64 signext %a, i64 signext %b) {
	entry:			entry:
	; ALL-LABEL: lshr_i64:			; ALL-LABEL: lshr_i64:

	; M2: srlv $[[T0:[0-9]+]], $4, $7			; M2: srlv $[[T0:[0-9]+]], $4, $7
	; M2: andi $[[T1:[0-9]+]], $7, 32			; M2: andi $[[T1:[0-9]+]], $7, 32
	; M2: bnez $[[T1]], $[[BB0:BB[0-9_]+]]			; M2: beqz $[[T1]], $[[BB0:BB[0-9_]+]]
	; M2: move $3, $[[T0]]			; M2: move $3, $[[T0]]
				; M2: beqz $[[T1]], $[[BB1:BB[0-9_]+]]
				; M2: addiu $2, $zero, 0
				; M2: $[[EXIT:BB[0-9_]+]]:
				; M2: jr $ra
				; M2: nop
				; M2: $[[BB0]]:
	; M2: srlv $[[T2:[0-9]+]], $5, $7			; M2: srlv $[[T2:[0-9]+]], $5, $7
	; M2: not $[[T3:[0-9]+]], $7			; M2: not $[[T3:[0-9]+]], $7
	; M2: sll $[[T4:[0-9]+]], $4, 1			; M2: sll $[[T4:[0-9]+]], $4, 1
	; M2: sllv $[[T5:[0-9]+]], $[[T4]], $[[T3]]			; M2: sllv $[[T5:[0-9]+]], $[[T4]], $[[T3]]
	; M2: or $3, $[[T3]], $[[T2]]			; M2: or $3, $[[T3]], $[[T2]]
	; M2: $[[BB0]]:			; M2: bnez $[[T1]], $[[EXIT]]
	; M2: bnez $[[T1]], $[[BB1:BB[0-9_]+]]
	; M2: addiu $2, $zero, 0			; M2: addiu $2, $zero, 0
	; M2: move $2, $[[T0]]
	; M2: $[[BB1]]:			; M2: $[[BB1]]:
	; M2: jr $ra			; M2: jr $ra
	; M2: nop			; M2: move $2, $[[T0]]

	; 32R1-R5: srlv $[[T0:[0-9]+]], $5, $7			; 32R1-R5: srlv $[[T0:[0-9]+]], $5, $7
	; 32R1-R5: not $[[T1:[0-9]+]], $7			; 32R1-R5: not $[[T1:[0-9]+]], $7
	; 32R1-R5: sll $[[T2:[0-9]+]], $4, 1			; 32R1-R5: sll $[[T2:[0-9]+]], $4, 1
	; 32R1-R5: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]]			; 32R1-R5: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
	; 32R1-R5: or $3, $[[T3]], $[[T0]]			; 32R1-R5: or $3, $[[T3]], $[[T0]]
	; 32R1-R5: srlv $[[T4:[0-9]+]], $4, $7			; 32R1-R5: srlv $[[T4:[0-9]+]], $4, $7
	; 32R1-R5: andi $[[T5:[0-9]+]], $7, 32			; 32R1-R5: andi $[[T5:[0-9]+]], $7, 32
	▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines
	entry:			entry:
	; ALL-LABEL: lshr_i128:			; ALL-LABEL: lshr_i128:

	; GP32: lw $25, %call16(__lshrti3)($gp)			; GP32: lw $25, %call16(__lshrti3)($gp)

	; M3: sll $[[T0:[0-9]+]], $7, 0			; M3: sll $[[T0:[0-9]+]], $7, 0
	; M3: dsrlv $[[T1:[0-9]+]], $4, $7			; M3: dsrlv $[[T1:[0-9]+]], $4, $7
	; M3: andi $[[T2:[0-9]+]], $[[T0]], 64			; M3: andi $[[T2:[0-9]+]], $[[T0]], 64
	; M3: bnez $[[T3:[0-9]+]], [[BB0:\.LBB[0-9_]+]]			; M3: beqz $[[T3:[0-9]+]], .[[BB0:LBB[0-9_]+]]
	; M3: move $3, $[[T1]]			; M3: move $3, $[[T1]]
				; M3: beqz $[[T3]], .[[BB1:LBB[0-9_]+]]
				; M3: daddiu $2, $zero, 0
				; M3: .[[EXIT:LBB[0-9_]+]]:
				; M3: jr $ra
				; M3: nop
				; M3: .[[BB0]]:
	; M3: dsrlv $[[T4:[0-9]+]], $5, $7			; M3: dsrlv $[[T4:[0-9]+]], $5, $7
	; M3: dsll $[[T5:[0-9]+]], $4, 1			; M3: dsll $[[T5:[0-9]+]], $4, 1
	; M3: not $[[T6:[0-9]+]], $[[T0]]			; M3: not $[[T6:[0-9]+]], $[[T0]]
	; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]]			; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]]
	; M3: or $3, $[[T7]], $[[T4]]			; M3: or $3, $[[T7]], $[[T4]]
	; M3: [[BB0]]:			; M3: bnez $[[T3]], .[[EXIT]]
	; M3: bnez $[[T3]], [[BB1:\.LBB[0-9_]+]]
	; M3: daddiu $2, $zero, 0			; M3: daddiu $2, $zero, 0
	; M3: move $2, $[[T1]]
	; M3: [[BB1]]:			; M3: [[BB1]]:
	; M3: jr $ra			; M3: jr $ra
	; M3: nop			; M3: move $2, $[[T1]]

	; GP64-NOT-R6: dsrlv $[[T0:[0-9]+]], $5, $7			; GP64-NOT-R6: dsrlv $[[T0:[0-9]+]], $5, $7
	; GP64-NOT-R6: dsll $[[T1:[0-9]+]], $4, 1			; GP64-NOT-R6: dsll $[[T1:[0-9]+]], $4, 1
	; GP64-NOT-R6: sll $[[T2:[0-9]+]], $7, 0			; GP64-NOT-R6: sll $[[T2:[0-9]+]], $7, 0
	; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T2]]			; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T2]]
	; GP64-NOT-R6: dsllv $[[T4:[0-9]+]], $[[T1]], $[[T3]]			; GP64-NOT-R6: dsllv $[[T4:[0-9]+]], $[[T1]], $[[T3]]
	; GP64-NOT-R6: or $3, $[[T4]], $[[T0]]			; GP64-NOT-R6: or $3, $[[T4]], $[[T0]]
	; GP64-NOT-R6: dsrlv $2, $4, $7			; GP64-NOT-R6: dsrlv $2, $4, $7
	Show All 25 Lines

test/CodeGen/Mips/llvm-ir/shl.ll

	Show First 20 Lines • Show All 91 Lines • ▼ Show 20 Lines
	}			}

	define signext i64 @shl_i64(i64 signext %a, i64 signext %b) {			define signext i64 @shl_i64(i64 signext %a, i64 signext %b) {
	entry:			entry:
	; ALL-LABEL: shl_i64:			; ALL-LABEL: shl_i64:

	; M2: sllv $[[T0:[0-9]+]], $5, $7			; M2: sllv $[[T0:[0-9]+]], $5, $7
	; M2: andi $[[T1:[0-9]+]], $7, 32			; M2: andi $[[T1:[0-9]+]], $7, 32
	; M2: bnez $[[T1]], $[[BB0:BB[0-9_]+]]			; M2: beqz $[[T1]], $[[BB0:BB[0-9_]+]]
	; M2: move $2, $[[T0]]			; M2: move $2, $[[T0]]
				; M2: beqz $[[T1]], $[[BB1:BB[0-9_]+]]
				; M2: addiu $3, $zero, 0
				; M2: $[[EXIT:BB[0-9_]+]]:
				; M2: jr $ra
				; M2: nop
				; M2: $[[BB0]]:
	; M2: sllv $[[T2:[0-9]+]], $4, $7			; M2: sllv $[[T2:[0-9]+]], $4, $7
	; M2: not $[[T3:[0-9]+]], $7			; M2: not $[[T3:[0-9]+]], $7
	; M2: srl $[[T4:[0-9]+]], $5, 1			; M2: srl $[[T4:[0-9]+]], $5, 1
	; M2: srlv $[[T5:[0-9]+]], $[[T4]], $[[T3]]			; M2: srlv $[[T5:[0-9]+]], $[[T4]], $[[T3]]
	; M2: or $2, $[[T2]], $[[T3]]			; M2: or $2, $[[T2]], $[[T3]]
	; M2: $[[BB0]]:			; M2: bnez $[[T1]], $[[EXIT]]
	; M2: bnez $[[T1]], $[[BB1:BB[0-9_]+]]
	; M2: addiu $3, $zero, 0			; M2: addiu $3, $zero, 0
	; M2: move $3, $[[T0]]
	; M2: $[[BB1]]:			; M2: $[[BB1]]:
	; M2: jr $ra			; M2: jr $ra
	; M2: nop			; M2: move $3, $[[T0]]

	; 32R1-R5: sllv $[[T0:[0-9]+]], $4, $7			; 32R1-R5: sllv $[[T0:[0-9]+]], $4, $7
	; 32R1-R5: not $[[T1:[0-9]+]], $7			; 32R1-R5: not $[[T1:[0-9]+]], $7
	; 32R1-R5: srl $[[T2:[0-9]+]], $5, 1			; 32R1-R5: srl $[[T2:[0-9]+]], $5, 1
	; 32R1-R5: srlv $[[T3:[0-9]+]], $[[T2]], $[[T1]]			; 32R1-R5: srlv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
	; 32R1-R5: or $2, $[[T0]], $[[T3]]			; 32R1-R5: or $2, $[[T0]], $[[T3]]
	; 32R1-R5: sllv $[[T4:[0-9]+]], $5, $7			; 32R1-R5: sllv $[[T4:[0-9]+]], $5, $7
	; 32R1-R5: andi $[[T5:[0-9]+]], $7, 32			; 32R1-R5: andi $[[T5:[0-9]+]], $7, 32
	▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines
	entry:			entry:
	; ALL-LABEL: shl_i128:			; ALL-LABEL: shl_i128:

	; GP32: lw $25, %call16(__ashlti3)($gp)			; GP32: lw $25, %call16(__ashlti3)($gp)

	; M3: sll $[[T0:[0-9]+]], $7, 0			; M3: sll $[[T0:[0-9]+]], $7, 0
	; M3: dsllv $[[T1:[0-9]+]], $5, $7			; M3: dsllv $[[T1:[0-9]+]], $5, $7
	; M3: andi $[[T2:[0-9]+]], $[[T0]], 64			; M3: andi $[[T2:[0-9]+]], $[[T0]], 64
	; M3: bnez $[[T3:[0-9]+]], [[BB0:\.LBB[0-9_]+]]			; M3: beqz $[[T3:[0-9]+]], .[[BB0:LBB[0-9_]+]]
	; M3: move $2, $[[T1]]			; M3: move $2, $[[T1]]
				; M3: beqz $[[T3]], .[[BB1:LBB[0-9_]+]]
				; M3: daddiu $3, $zero, 0
				; M3: .[[EXIT:LBB[0-9_]+]]:
				; M3: jr $ra
				; M3: nop
				; M3: .[[BB0]]:
	; M3: dsllv $[[T4:[0-9]+]], $4, $7			; M3: dsllv $[[T4:[0-9]+]], $4, $7
	; M3: dsrl $[[T5:[0-9]+]], $5, 1			; M3: dsrl $[[T5:[0-9]+]], $5, 1
	; M3: not $[[T6:[0-9]+]], $[[T0]]			; M3: not $[[T6:[0-9]+]], $[[T0]]
	; M3: dsrlv $[[T7:[0-9]+]], $[[T5]], $[[T6]]			; M3: dsrlv $[[T7:[0-9]+]], $[[T5]], $[[T6]]
	; M3: or $2, $[[T4]], $[[T7]]			; M3: or $2, $[[T4]], $[[T7]]
	; M3: [[BB0]]:			; M3: bnez $[[T3]], .[[EXIT]]
	; M3: bnez $[[T3]], [[BB1:\.LBB[0-9_]+]]
	; M3: daddiu $3, $zero, 0			; M3: daddiu $3, $zero, 0
	; M3: move $3, $[[T1]]
	; M3: [[BB1]]:			; M3: [[BB1]]:
	; M3: jr $ra			; M3: jr $ra
	; M3: nop			; M3: move $3, $[[T1]]

	; GP64-NOT-R6: dsllv $[[T0:[0-9]+]], $4, $7			; GP64-NOT-R6: dsllv $[[T0:[0-9]+]], $4, $7
	; GP64-NOT-R6: dsrl $[[T1:[0-9]+]], $5, 1			; GP64-NOT-R6: dsrl $[[T1:[0-9]+]], $5, 1
	; GP64-NOT-R6: sll $[[T2:[0-9]+]], $7, 0			; GP64-NOT-R6: sll $[[T2:[0-9]+]], $7, 0
	; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T2]]			; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T2]]
	; GP64-NOT-R6: dsrlv $[[T4:[0-9]+]], $[[T1]], $[[T3]]			; GP64-NOT-R6: dsrlv $[[T4:[0-9]+]], $[[T1]], $[[T3]]
	; GP64-NOT-R6: or $2, $[[T0]], $[[T4]]			; GP64-NOT-R6: or $2, $[[T0]], $[[T4]]
	; GP64-NOT-R6: dsllv $3, $5, $7			; GP64-NOT-R6: dsllv $3, $5, $7
	Show All 25 Lines

test/CodeGen/Mips/longbranch.ll

	Show First 20 Lines • Show All 78 Lines • ▼ Show 20 Lines
	; In MIPS32R6 JR is an alias to JALR with $rd=0. As everything else remains the			; In MIPS32R6 JR is an alias to JALR with $rd=0. As everything else remains the
	; same with the O32 prefix, we use -asm-show-inst in order to make sure that			; same with the O32 prefix, we use -asm-show-inst in order to make sure that
	; the opcode of the MachineInst is a JALR.			; the opcode of the MachineInst is a JALR.
	; O32-R6: JALR			; O32-R6: JALR

	; Check the MIPS64 version.			; Check the MIPS64 version.

	; N64: lui $[[R0:[0-9]+]], %hi(%neg(%gp_rel(test1)))			; N64: lui $[[R0:[0-9]+]], %hi(%neg(%gp_rel(test1)))
	; N64: bnez $4, [[BB0:\.LBB[0-9_]+]]			; N64: beqz $4, .[[EXIT:LBB[0-9_]+]]
	; N64: daddu $[[R1:[0-9]+]], $[[R0]], $25			; N64: daddu $[[R1:[0-9]+]], $[[R0]], $25

	; Check for long branch expansion:			; Check for long branch expansion:
	; N64: daddiu $sp, $sp, -16			; N64: daddiu $sp, $sp, -16
	; N64-NEXT: sd $ra, 0($sp)			; N64-NEXT: sd $ra, 0($sp)
	; N64-NEXT: daddiu $1, $zero, %hi([[BB2:\.LBB[0-9_]+]]-[[BB1:\.LBB[0-9_]+]])			; N64-NEXT: daddiu $1, $zero, %hi([[BB2:\.LBB[0-9_]+]]-[[BB1:\.LBB[0-9_]+]])
	; N64-NEXT: dsll $1, $1, 16			; N64-NEXT: dsll $1, $1, 16
	; N64-NEXT: bal [[BB1]]			; N64-NEXT: bal [[BB1]]
	; N64-NEXT: daddiu $1, $1, %lo([[BB2]]-[[BB1]])			; N64-NEXT: daddiu $1, $1, %lo([[BB2]]-[[BB1]])
	; N64-NEXT: [[BB1]]:			; N64-NEXT: [[BB1]]:
	; N64-NEXT: daddu $1, $ra, $1			; N64-NEXT: daddu $1, $ra, $1
	; N64-NEXT: ld $ra, 0($sp)			; N64-NEXT: ld $ra, 0($sp)
	; N64-NEXT: jr $1			; N64-NEXT: jr $1
	; N64-NEXT: daddiu $sp, $sp, 16			; N64-NEXT: daddiu $sp, $sp, 16

	; N64: [[BB0]]:			; N64: [[EXIT]]:
				; N64: jr $ra
				; N64: nop
				; N64: [[BB2]]:
	; N64: daddiu $[[GP:[0-9]+]], $[[R1]], %lo(%neg(%gp_rel(test1)))			; N64: daddiu $[[GP:[0-9]+]], $[[R1]], %lo(%neg(%gp_rel(test1)))
	; N64: ld $[[R2:[0-9]+]], %got_disp(x)($[[GP]])			; N64: ld $[[R2:[0-9]+]], %got_disp(x)($[[GP]])
	; N64: addiu $[[R3:[0-9]+]], $zero, 1			; N64: addiu $[[R3:[0-9]+]], $zero, 1
	; N64: sw $[[R3]], 0($[[R2]])
	; N64: [[BB2]]:
	; N64: jr $ra			; N64: jr $ra
	; N64: nop			; N64: sw $[[R3]], 0($[[R2]])

	; In MIPS64R6 JR is an alias to JALR with $rd=0. As everything else remains the			; In MIPS64R6 JR is an alias to JALR with $rd=0. As everything else remains the
	; same with the N64 prefix, we use -asm-show-inst in order to make sure that			; same with the N64 prefix, we use -asm-show-inst in order to make sure that
	; the opcode of the MachineInst is a JALR.			; the opcode of the MachineInst is a JALR.
	; N64-R6: JALR64			; N64-R6: JALR64


	; Check the microMIPS version.			; Check the microMIPS version.
	▲ Show 20 Lines • Show All 57 Lines • Show Last 20 Lines

test/CodeGen/PowerPC/bdzlr.ll

Show First 20 Lines • Show All 47 Lines • ▼ Show 20 Lines	for.body.for.body_crit_edge: ; preds = %for.body
%.pre = load %struct.lua_TValue.17.692, %struct.lua_TValue.17.692* undef, align 8		%.pre = load %struct.lua_TValue.17.692, %struct.lua_TValue.17.692* undef, align 8
br label %for.body		br label %for.body

for.end: ; preds = %for.body, %if.end, %entry		for.end: ; preds = %for.body, %if.end, %entry
ret void		ret void

; CHECK: @lua_xmove		; CHECK: @lua_xmove
; CHECK: bnelr		; CHECK: bnelr
; CHECK: bnelr		; CHECK: beq
		; CHECK: blr
; CHECK: bdzlr		; CHECK: bdzlr
; CHECK-NOT: blr		; CHECK-NOT: blr

; CHECK-CRB: @lua_xmove		; CHECK-CRB: @lua_xmove
; CHECK-CRB: bclr 12,		; CHECK-CRB: bclr 12,
; CHECK-CRB: bclr 12,		; CHECK-CRB: bc 4,
		; CHECK-CRB: blr
; CHECK-CRB: bdzlr		; CHECK-CRB: bdzlr
; CHECK-CRB-NOT: blr		; CHECK-CRB-NOT: blr
}		}

attributes #0 = { nounwind }		attributes #0 = { nounwind }

test/CodeGen/PowerPC/tail-dup-layout.ll

; RUN: llc -outline-optional-branches -O2 < %s \| FileCheck %s		; RUN: llc -O2 < %s \| FileCheck %s
davidxlUnsubmitted Not Done Reply Inline Actions This test case can use some simplifications. Why not just do simple function call in optional branches? The test block can also be simplified for instance testing input parameters. davidxl: This test case can use some simplifications. Why not just do simple function call in optional…
target datalayout = "e-m:e-i64:64-n32:64"		target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-grtev4-linux-gnu"		target triple = "powerpc64le-grtev4-linux-gnu"

; Intended layout:		; Intended layout:
; The outlining flag produces the layout		; The chain-based outlining produces the layout
; test1		; test1
; test2		; test2
; test3		; test3
; test4		; test4
; exit
; optional1		; optional1
; optional2		; optional2
; optional3		; optional3
; optional4		; optional4
		; exit
; Tail duplication puts test n+1 at the end of optional n		; Tail duplication puts test n+1 at the end of optional n
; so optional1 includes a copy of test2 at the end, and branches		; so optional1 includes a copy of test2 at the end, and branches
; to test3 (at the top) or falls through to optional 2.		; to test3 (at the top) or falls through to optional 2.
; The CHECK statements check for the whole string of tests and exit block,		; The CHECK statements check for the whole string of tests
; and then check that the correct test has been duplicated into the end of		; and then check that the correct test has been duplicated into the end of
; the optional blocks and that the optional blocks are in the correct order.		; the optional blocks and that the optional blocks are in the correct order.
;CHECK-LABEL: f:		;CHECK-LABEL: straight_test:
; test1 may have been merged with entry		; test1 may have been merged with entry
;CHECK: mr [[TAGREG:[0-9]+]], 3		;CHECK: mr [[TAGREG:[0-9]+]], 3
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1		;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
;CHECK-NEXT: bc 12, 1, [[OPT1LABEL:[._0-9A-Za-z]+]]		;CHECK-NEXT: bc 12, 1, [[OPT1LABEL:[._0-9A-Za-z]+]]
;CHECK-NEXT: [[TEST2LABEL:[._0-9A-Za-z]+]]: # %test2		;CHECK-NEXT: [[TEST2LABEL:[._0-9A-Za-z]+]]: # %test2
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30		;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
;CHECK-NEXT: bne 0, [[OPT2LABEL:[._0-9A-Za-z]+]]		;CHECK-NEXT: bne 0, [[OPT2LABEL:[._0-9A-Za-z]+]]
;CHECK-NEXT: [[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3		;CHECK-NEXT: [[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29		;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]]		;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]]
;CHECK-NEXT: [[TEST4LABEL:[._0-9A-Za-z]+]]: # %test4		;CHECK-NEXT: [[TEST4LABEL:[._0-9A-Za-z]+]]: # %test4
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28		;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
;CHECK-NEXT: bne 0, .[[OPT4LABEL:[._0-9A-Za-z]+]]		;CHECK-NEXT: bne 0, .[[OPT4LABEL:[._0-9A-Za-z]+]]
;CHECK-NEXT: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit		;CHECK-NEXT: b [[EXITLABEL:[._0-9A-Za-z]+]]
;CHECK: blr
;CHECK-NEXT: [[OPT1LABEL]]		;CHECK-NEXT: [[OPT1LABEL]]
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30		;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
;CHECK-NEXT: beq 0, [[TEST3LABEL]]		;CHECK-NEXT: beq 0, [[TEST3LABEL]]
;CHECK-NEXT: [[OPT2LABEL]]		;CHECK-NEXT: [[OPT2LABEL]]
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29		;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
;CHECK-NEXT: beq 0, [[TEST4LABEL]]		;CHECK-NEXT: beq 0, [[TEST4LABEL]]
;CHECK-NEXT: [[OPT3LABEL]]		;CHECK-NEXT: [[OPT3LABEL]]
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28		;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
;CHECK-NEXT: beq 0, [[EXITLABEL]]		;CHECK-NEXT: beq 0, [[EXITLABEL]]
;CHECK-NEXT: [[OPT4LABEL]]		;CHECK-NEXT: [[OPT4LABEL]]
;CHECK: b [[EXITLABEL]]		;CHECK: [[EXITLABEL]]: # %exit
		;CHECK: blr

define void @f(i32 %tag) {		define void @straight_test(i32 %tag) {
entry:		entry:
br label %test1		br label %test1
test1:		test1:
%tagbit1 = and i32 %tag, 1		%tagbit1 = and i32 %tag, 1
%tagbit1eq0 = icmp eq i32 %tagbit1, 0		%tagbit1eq0 = icmp eq i32 %tagbit1, 0
br i1 %tagbit1eq0, label %test2, label %optional1		br i1 %tagbit1eq0, label %test2, label %optional1
optional1:		optional1:
call void @a()		call void @a()
Show All 30 Lines	optional4:
call void @d()		call void @d()
call void @d()		call void @d()
call void @d()		call void @d()
br label %exit		br label %exit
exit:		exit:
ret void		ret void
}		}

		; Intended layout:
		; The chain-based outlining produces the layout
		; entry
		; --- Begin loop ---
		; for.latch
		; for.check
		; test1
		; test2
		; test3
		; test4
		; optional1
		; optional2
		; optional3
		; optional4
		; --- End loop ---
		; exit
		; The CHECK statements check for the whole string of tests and exit block,
		; and then check that the correct test has been duplicated into the end of
		; the optional blocks and that the optional blocks are in the correct order.
		;CHECK-LABEL: loop_test:
		;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4
		;CHECK: [[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch
		;CHECK: addi
		;CHECK: [[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
		;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]])
		;CHECK: # %test1
		;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
		;CHECK-NEXT: bc 12, 1, [[OPT1LABEL:[._0-9A-Za-z]+]]
		;CHECK-NEXT: # %test2
		;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
		;CHECK-NEXT: bne 0, [[OPT2LABEL:[._0-9A-Za-z]+]]
		;CHECK-NEXT: [[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3
		;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
		;CHECK-NEXT: bne 0, [[OPT3LABEL:[._0-9A-Za-z]+]]
		;CHECK-NEXT: [[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4\|optional3)}}
		;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
		;CHECK-NEXT: beq 0, [[LATCHLABEL]]
		;CHECK-NEXT: b [[OPT4LABEL:[._0-9A-Za-z]+]]
		;CHECK: [[OPT1LABEL]]
		;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
		;CHECK-NEXT: beq 0, [[TEST3LABEL]]
		;CHECK-NEXT: [[OPT2LABEL]]
		;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
		;CHECK-NEXT: beq 0, [[TEST4LABEL]]
		;CHECK-NEXT: [[OPT3LABEL]]
		;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
		;CHECK-NEXT: beq 0, [[LATCHLABEL]]
		;CHECK-NEXT: [[OPT4LABEL]]
		;CHECK: b [[LATCHLABEL]]
		define void @loop_test(i32* %tags, i32 %count) {
		entry:
		br label %for.check
		for.check:
		%count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch]
		%done.count = icmp ugt i32 %count.loop, 0
		%tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count
		%tag = load i32, i32* %tag_ptr
		%done.tag = icmp eq i32 %tag, 0
		%done = and i1 %done.count, %done.tag
		br i1 %done, label %test1, label %exit
		test1:
		%tagbit1 = and i32 %tag, 1
		%tagbit1eq0 = icmp eq i32 %tagbit1, 0
		br i1 %tagbit1eq0, label %test2, label %optional1
		optional1:
		call void @a()
		call void @a()
		call void @a()
		call void @a()
		br label %test2
		test2:
		%tagbit2 = and i32 %tag, 2
		%tagbit2eq0 = icmp eq i32 %tagbit2, 0
		br i1 %tagbit2eq0, label %test3, label %optional2
		optional2:
		call void @b()
		call void @b()
		call void @b()
		call void @b()
		br label %test3
		test3:
		%tagbit3 = and i32 %tag, 4
		%tagbit3eq0 = icmp eq i32 %tagbit3, 0
		br i1 %tagbit3eq0, label %test4, label %optional3
		optional3:
		call void @c()
		call void @c()
		call void @c()
		call void @c()
		br label %test4
		test4:
		%tagbit4 = and i32 %tag, 8
		%tagbit4eq0 = icmp eq i32 %tagbit4, 0
		br i1 %tagbit4eq0, label %for.latch, label %optional4
		optional4:
		call void @d()
		call void @d()
		call void @d()
		call void @d()
		br label %for.latch
		for.latch:
		%count.sub = sub i32 %count.loop, 1
		br label %for.check
		exit:
		ret void
		}

declare void @a()		declare void @a()
declare void @b()		declare void @b()
declare void @c()		declare void @c()
declare void @d()		declare void @d()

test/CodeGen/SPARC/sjlj.ll

	Show First 20 Lines • Show All 60 Lines • ▼ Show 20 Lines
	; CHECK: or %i1, %lo(.LBB1_2), %i1			; CHECK: or %i1, %lo(.LBB1_2), %i1
	; CHECK: st %i1, [%i0+4]			; CHECK: st %i1, [%i0+4]
	; CHECK: st %sp, [%i0+8]			; CHECK: st %sp, [%i0+8]
	; CHECK: bn .LBB1_2			; CHECK: bn .LBB1_2
	; CHECK: st %i7, [%i0+12]			; CHECK: st %i7, [%i0+12]
	; CHECK: ba .LBB1_1			; CHECK: ba .LBB1_1
	; CHECK: nop			; CHECK: nop
	; CHECK:.LBB1_1: ! %entry			; CHECK:.LBB1_1: ! %entry
	; CHECK: ba .LBB1_3
	; CHECK: mov %g0, %i0			; CHECK: mov %g0, %i0
				; CHECK: cmp %i0, 0
				; CHECK: bne .LBB1_4
				; CHECK: ba .LBB1_5
	; CHECK:.LBB1_2: ! Block address taken			; CHECK:.LBB1_2: ! Block address taken
	; CHECK: mov 1, %i0			; CHECK: mov 1, %i0
	; CHECK:.LBB1_3: ! %entry
	; CHECK: cmp %i0, 0
	; CHECK: be .LBB1_5			; CHECK: be .LBB1_5
	; CHECK: nop			; CHECK:.LBB1_4:
				; CHECK: ba .LBB1_6
	}			}
	declare i8* @llvm.frameaddress(i32) #2			declare i8* @llvm.frameaddress(i32) #2

	declare i8* @llvm.stacksave() #3			declare i8* @llvm.stacksave() #3

	declare i32 @llvm.eh.sjlj.setjmp(i8*) #3			declare i32 @llvm.eh.sjlj.setjmp(i8*) #3

	attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }			attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
	attributes #1 = { noreturn nounwind }			attributes #1 = { noreturn nounwind }
	attributes #2 = { nounwind readnone }			attributes #2 = { nounwind readnone }
	attributes #3 = { nounwind }			attributes #3 = { nounwind }

test/CodeGen/SystemZ/tdc-06.ll

	Show All 20 Lines
	; CHECK: cdbr %f0, %f0			; CHECK: cdbr %f0, %f0
	; CHECK: jo [[RET]]			; CHECK: jo [[RET]]
	%testnan = fcmp uno double %x, 0.000000e+00			%testnan = fcmp uno double %x, 0.000000e+00
	br i1 %testnan, label %ret, label %nonzeroord, !prof !1			br i1 %testnan, label %ret, label %nonzeroord, !prof !1

	nonzeroord:			nonzeroord:
	; CHECK: lhi %r2, 2			; CHECK: lhi %r2, 2
	; CHECK: tcdb %f0, 48			; CHECK: tcdb %f0, 48
	; CHECK: jl [[RET]]			; CHECK: je [[FINITE:.]]
	%abs = tail call double @llvm.fabs.f64(double %x)			%abs = tail call double @llvm.fabs.f64(double %x)
	%testinf = fcmp oeq double %abs, 0x7FF0000000000000			%testinf = fcmp oeq double %abs, 0x7FF0000000000000
	br i1 %testinf, label %ret, label %finite, !prof !1			br i1 %testinf, label %ret, label %finite, !prof !1

				ret:
				; CHECK: [[RET]]:
				; CHECK: br %r14
				%res = phi i32 [ 5, %entry ], [ 1, %nonzero ], [ 2, %nonzeroord ], [ %finres, %finite ]
				ret i32 %res

	finite:			finite:
	; CHECK: lhi %r2, 3			; CHECK: lhi %r2, 3
	; CHECK: tcdb %f0, 831			; CHECK: tcdb %f0, 831
	; CHECK: blr %r14			; CHECK: blr %r14
	; CHECK: lhi %r2, 4			; CHECK: lhi %r2, 4
				; CHECK: br %r14
	%testnormal = fcmp uge double %abs, 0x10000000000000			%testnormal = fcmp uge double %abs, 0x10000000000000
	%finres = select i1 %testnormal, i32 3, i32 4			%finres = select i1 %testnormal, i32 3, i32 4
	br label %ret			br label %ret

	ret:
	; CHECK: [[RET]]:
	; CHECK: br %r14
	%res = phi i32 [ 5, %entry ], [ 1, %nonzero ], [ 2, %nonzeroord ], [ %finres, %finite ]
	ret i32 %res
	}			}

	!1 = !{!"branch_weights", i32 1, i32 1}			!1 = !{!"branch_weights", i32 1, i32 1}

test/CodeGen/Thumb/thumb-shrink-wrapping.ll

	; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \			; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho \
	; RUN: \| FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V4T			; RUN: \| FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V4T
	; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \			; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho \
	; RUN: \| FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V5T			; RUN: \| FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V5T
	; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \			; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho \
	; RUN: \| FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V4T			; RUN: \| FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V4T
	; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \			; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho \
	; RUN: \| FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V5T			; RUN: \| FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V5T

	;			;
	; Note: Lots of tests use inline asm instead of regular calls.			; Note: Lots of tests use inline asm instead of regular calls.
	; This allows to have a better control on what the allocation will do.			; This allows to have a better control on what the allocation will do.
	; Otherwise, we may have spill right in the entry block, defeating			; Otherwise, we may have spill right in the entry block, defeating
	; shrink-wrapping. Moreover, some of the inline asm statements (nop)			; shrink-wrapping. Moreover, some of the inline asm statements (nop)
	; are here to ensure that the related paths do not end up as critical			; are here to ensure that the related paths do not end up as critical
	; edges.			; edges.
	; Also disable the late if-converter as it makes harder to reason on			; Also disable the late if-converter as it makes harder to reason on
	; the diffs.			; the diffs.
				; Disable tail-duplication during placement, as v4t vs v5t get different
				; results due to branches not being analyzable under v5

	; Initial motivating example: Simple diamond with a call just on one side.			; Initial motivating example: Simple diamond with a call just on one side.
	; CHECK-LABEL: foo:			; CHECK-LABEL: foo:
	;			;
	; Compare the arguments and jump to exit.			; Compare the arguments and jump to exit.
	; No prologue needed.			; No prologue needed.
	; ENABLE: cmp r0, r1			; ENABLE: cmp r0, r1
	; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]			; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
	▲ Show 20 Lines • Show All 665 Lines • Show Last 20 Lines

test/CodeGen/Thumb2/cbnz.ll

Show All 20 Lines	t:
call void @x()		call void @x()
call void @x()		call void @x()
call void @x()		call void @x()
call void @x()		call void @x()
call void @x()		call void @x()
call void @x()		call void @x()
call void @x()		call void @x()
call void @x()		call void @x()
; CHECK: cbnz		; CHECK: cbz
%q = icmp eq i32 %y, 0		%q = icmp eq i32 %y, 0
br i1 %q, label %t2, label %f		br i1 %q, label %t2, label %f

t2:		t2:
call void @x()		call void @x()
call void @x()		call void @x()
call void @x()		call void @x()
call void @x()		call void @x()
Show All 17 Lines

test/CodeGen/Thumb2/ifcvt-compare.ll

	; RUN: llc -mtriple=thumbv7-unknown-linux %s -o - \| FileCheck %s			; RUN: llc -mtriple=thumbv7-unknown-linux %s -o - \| FileCheck %s

	declare void @x()			declare void @x()

	define void @f0(i32 %x) optsize {			define void @f0(i32 %x) optsize {
	; CHECK-LABEL: f0:			; CHECK-LABEL: f0:
	; CHECK: cbnz			; CHECK: cbz
	%p = icmp eq i32 %x, 0			%p = icmp eq i32 %x, 0
	br i1 %p, label %t, label %f			br i1 %p, label %t, label %f

	t:			t:
	call void @x()			call void @x()
	br label %f			br label %f

	f:			f:
	Show All 34 Lines

test/CodeGen/WebAssembly/mem-intrinsics.ll

	; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0\| FileCheck %s			; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0 \| FileCheck %s

	; Test memcpy, memmove, and memset intrinsics.			; Test memcpy, memmove, and memset intrinsics.

	target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"			target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
	target triple = "wasm32-unknown-unknown"			target triple = "wasm32-unknown-unknown"

	declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)			declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
	declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)			declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
	▲ Show 20 Lines • Show All 131 Lines • Show Last 20 Lines

test/CodeGen/X86/2012-08-17-legalizer-crash.ll

	Show All 20 Lines
	if.then: ; preds = %entry			if.then: ; preds = %entry
	store i576 %srcval2, i576* %1, align 8			store i576 %srcval2, i576* %1, align 8
	br label %if.end			br label %if.end

	if.end: ; preds = %if.then, %entry			if.end: ; preds = %if.then, %entry
	ret void			ret void

	; CHECK-LABEL: fn1:			; CHECK-LABEL: fn1:
	; CHECK: jb			; CHECK: jae
	}			}

test/CodeGen/X86/avx-splat.ll

	Show First 20 Lines • Show All 56 Lines • ▼ Show 20 Lines
	; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>			; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
	;			;
	define <8 x float> @funcE() nounwind {			define <8 x float> @funcE() nounwind {
	; CHECK-LABEL: funcE:			; CHECK-LABEL: funcE:
	; CHECK: ## BB#0: ## %for_exit499			; CHECK: ## BB#0: ## %for_exit499
	; CHECK-NEXT: xorl %eax, %eax			; CHECK-NEXT: xorl %eax, %eax
	; CHECK-NEXT: ## implicit-def: %YMM0			; CHECK-NEXT: ## implicit-def: %YMM0
	; CHECK-NEXT: testb %al, %al			; CHECK-NEXT: testb %al, %al
	; CHECK-NEXT: jne LBB4_2			; CHECK-NEXT: je LBB4_1
	; CHECK-NEXT: ## BB#1: ## %load.i1247			; CHECK-NEXT: ## BB#2: ## %__load_and_broadcast_32.exit1249
				; CHECK-NEXT: retq
				; CHECK-NEXT: LBB4_1: ## %load.i1247
	; CHECK-NEXT: pushq %rbp			; CHECK-NEXT: pushq %rbp
	; CHECK-NEXT: movq %rsp, %rbp			; CHECK-NEXT: movq %rsp, %rbp
	; CHECK-NEXT: andq $-32, %rsp			; CHECK-NEXT: andq $-32, %rsp
	; CHECK-NEXT: subq $1312, %rsp ## imm = 0x520			; CHECK-NEXT: subq $1312, %rsp ## imm = 0x520
	; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0			; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0
	; CHECK-NEXT: movq %rbp, %rsp			; CHECK-NEXT: movq %rbp, %rsp
	; CHECK-NEXT: popq %rbp			; CHECK-NEXT: popq %rbp
	; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	allocas:			allocas:
	%udx495 = alloca [18 x [18 x float]], align 32			%udx495 = alloca [18 x [18 x float]], align 32
	br label %for_test505.preheader			br label %for_test505.preheader

	for_test505.preheader: ; preds = %for_test505.preheader, %allocas			for_test505.preheader: ; preds = %for_test505.preheader, %allocas
	br i1 undef, label %for_exit499, label %for_test505.preheader			br i1 undef, label %for_exit499, label %for_test505.preheader

	▲ Show 20 Lines • Show All 91 Lines • Show Last 20 Lines

test/CodeGen/X86/avx512-cmp.ll

	Show First 20 Lines • Show All 63 Lines • ▼ Show 20 Lines
	}			}

	define float @test5(float %p) #0 {			define float @test5(float %p) #0 {
	; ALL-LABEL: test5:			; ALL-LABEL: test5:
	; ALL: ## BB#0: ## %entry			; ALL: ## BB#0: ## %entry
	; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1			; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
	; ALL-NEXT: vucomiss %xmm1, %xmm0			; ALL-NEXT: vucomiss %xmm1, %xmm0
	; ALL-NEXT: jne LBB3_1			; ALL-NEXT: jne LBB3_1
	; ALL-NEXT: jnp LBB3_2			; ALL-NEXT: jp LBB3_1
				; ALL-NEXT: ## BB#2: ## %return
				; ALL-NEXT: retq
	; ALL-NEXT: LBB3_1: ## %if.end			; ALL-NEXT: LBB3_1: ## %if.end
	; ALL-NEXT: seta %al			; ALL-NEXT: seta %al
	; ALL-NEXT: movzbl %al, %eax			; ALL-NEXT: movzbl %al, %eax
	; ALL-NEXT: leaq {{.*}}(%rip), %rcx			; ALL-NEXT: leaq {{.*}}(%rip), %rcx
	; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero			; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
	; ALL-NEXT: LBB3_2: ## %return
	; ALL-NEXT: retq			; ALL-NEXT: retq
	entry:			entry:
	%cmp = fcmp oeq float %p, 0.000000e+00			%cmp = fcmp oeq float %p, 0.000000e+00
	br i1 %cmp, label %return, label %if.end			br i1 %cmp, label %return, label %if.end

	if.end: ; preds = %entry			if.end: ; preds = %entry
	%cmp1 = fcmp ogt float %p, 0.000000e+00			%cmp1 = fcmp ogt float %p, 0.000000e+00
	%cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00			%cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00
	▲ Show 20 Lines • Show All 103 Lines • Show Last 20 Lines

test/CodeGen/X86/block-placement.ll

Show First 20 Lines • Show All 308 Lines • ▼ Show 20 Lines

exit:		exit:
ret i32 %sum		ret i32 %sum
}		}

define void @unnatural_cfg1() {		define void @unnatural_cfg1() {
; Test that we can handle a loop with an inner unnatural loop at the end of		; Test that we can handle a loop with an inner unnatural loop at the end of
; a function. This is a gross CFG reduced out of the single source GCC.		; a function. This is a gross CFG reduced out of the single source GCC.
; CHECK: unnatural_cfg1		; CHECK-LABEL: unnatural_cfg1
; CHECK: %entry		; CHECK: %entry
; CHECK: %loop.body1		; CHECK: %loop.body1
; CHECK: %loop.body2		; CHECK: %loop.body2
; CHECK: %loop.body3		; CHECK: %loop.body3

entry:		entry:
br label %loop.header		br label %loop.header

Show All 21 Lines	loop.body5:
%ptr2 = load i32, i32* undef, align 4		%ptr2 = load i32, i32* undef, align 4
br label %loop.body3		br label %loop.body3
}		}

define void @unnatural_cfg2() {		define void @unnatural_cfg2() {
; Test that we can handle a loop with a nested natural loop and an unnatural		; Test that we can handle a loop with a nested natural loop and an unnatural
; loop. This was reduced from a crash on block placement when run over		; loop. This was reduced from a crash on block placement when run over
; single-source GCC.		; single-source GCC.
; CHECK: unnatural_cfg2		; The tail-duplication outlining algorithm places
		; %loop.body3 and %loop.inner1.begin out-of-line at the end of the loop,
		; because %loop.body4 is unnavoidable within the loop and short,
		; and %loop.inner1.begin has an alternate fallthrough of %loop.body3
		; CHECK-LABEL: unnatural_cfg2
; CHECK: %entry		; CHECK: %entry
; CHECK: %loop.body1		; CHECK: %loop.body1
; CHECK: %loop.body2		; CHECK: %loop.body2
		; CHECK: %loop.body4
		; CHECK: %loop.inner2.begin
		; CHECK: %loop.inner2.begin
		; The loop.inner2.end block is folded
; CHECK: %loop.body3		; CHECK: %loop.body3
; CHECK: %loop.inner1.begin		; CHECK: %loop.inner1.begin
; The end block is folded with %loop.body3...		; The end block is folded with %loop.body3...
; CHECK-NOT: %loop.inner1.end		; CHECK-NOT: %loop.inner1.end
; CHECK: %loop.body4
; CHECK: %loop.inner2.begin
; The loop.inner2.end block is folded
; CHECK: %loop.header		; CHECK: %loop.header
; CHECK: %bail		; CHECK: %bail

entry:		entry:
br label %loop.header		br label %loop.header

loop.header:		loop.header:
%comp0 = icmp eq i32* undef, null		%comp0 = icmp eq i32* undef, null
▲ Show 20 Lines • Show All 180 Lines • ▼ Show 20 Lines
declare i32 @__gxx_personality_v0(...)		declare i32 @__gxx_personality_v0(...)

define void @test_eh_lpad_successor() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {		define void @test_eh_lpad_successor() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
; Some times the landing pad ends up as the first successor of an invoke block.		; Some times the landing pad ends up as the first successor of an invoke block.
; When this happens, a strange result used to fall out of updateTerminators: we		; When this happens, a strange result used to fall out of updateTerminators: we
; didn't correctly locate the fallthrough successor, assuming blindly that the		; didn't correctly locate the fallthrough successor, assuming blindly that the
; first one was the fallthrough successor. As a result, we would add an		; first one was the fallthrough successor. As a result, we would add an
; erroneous jump to the landing pad thinking that was the default successor.		; erroneous jump to the landing pad thinking that was the default successor.
; CHECK: test_eh_lpad_successor		; CHECK-LABEL: test_eh_lpad_successor
; CHECK: %entry		; CHECK: %entry
; CHECK-NOT: jmp		; CHECK-NOT: jmp
; CHECK: %loop		; CHECK: %loop

entry:		entry:
invoke i32 @f() to label %preheader unwind label %lpad		invoke i32 @f() to label %preheader unwind label %lpad

preheader:		preheader:
Show All 11 Lines
declare void @fake_throw() noreturn		declare void @fake_throw() noreturn

define void @test_eh_throw() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {		define void @test_eh_throw() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
; For blocks containing a 'throw' (or similar functionality), we have		; For blocks containing a 'throw' (or similar functionality), we have
; a no-return invoke. In this case, only EH successors will exist, and		; a no-return invoke. In this case, only EH successors will exist, and
; fallthrough simply won't occur. Make sure we don't crash trying to update		; fallthrough simply won't occur. Make sure we don't crash trying to update
; terminators for such constructs.		; terminators for such constructs.
;		;
; CHECK: test_eh_throw		; CHECK-LABEL: test_eh_throw
; CHECK: %entry		; CHECK: %entry
; CHECK: %cleanup		; CHECK: %cleanup

entry:		entry:
invoke void @fake_throw() to label %continue unwind label %cleanup		invoke void @fake_throw() to label %continue unwind label %cleanup

continue:		continue:
unreachable		unreachable

cleanup:		cleanup:
%0 = landingpad { i8*, i32 }		%0 = landingpad { i8*, i32 }
cleanup		cleanup
unreachable		unreachable
}		}

define void @test_unnatural_cfg_backwards_inner_loop() {		define void @test_unnatural_cfg_backwards_inner_loop() {
; Test that when we encounter an unnatural CFG structure after having formed		; Test that when we encounter an unnatural CFG structure after having formed
; a chain for an inner loop which happened to be laid out backwards we don't		; a chain for an inner loop which happened to be laid out backwards we don't
; attempt to merge onto the wrong end of the inner loop just because we find it		; attempt to merge onto the wrong end of the inner loop just because we find it
; first. This was reduced from a crasher in GCC's single source.		; first. This was reduced from a crasher in GCC's single source.
;		;
; CHECK: test_unnatural_cfg_backwards_inner_loop		; CHECK-LABEL: test_unnatural_cfg_backwards_inner_loop
; CHECK: %entry		; CHECK: %entry
; CHECK: %loop2b		; CHECK: %loop2b
; CHECK: %loop1		; CHECK: %loop1

entry:		entry:
br i1 undef, label %loop2a, label %body		br i1 undef, label %loop2a, label %body

body:		body:
Show All 23 Lines

define void @unanalyzable_branch_to_loop_header() {		define void @unanalyzable_branch_to_loop_header() {
; Ensure that we can handle unanalyzable branches into loop headers. We		; Ensure that we can handle unanalyzable branches into loop headers. We
; pre-form chains for unanalyzable branches, and will find the tail end of that		; pre-form chains for unanalyzable branches, and will find the tail end of that
; at the start of the loop. This function uses floating point comparison		; at the start of the loop. This function uses floating point comparison
; fallthrough because that happens to always produce unanalyzable branches on		; fallthrough because that happens to always produce unanalyzable branches on
; x86.		; x86.
;		;
; CHECK: unanalyzable_branch_to_loop_header		; CHECK-LABEL: unanalyzable_branch_to_loop_header
; CHECK: %entry		; CHECK: %entry
; CHECK: %loop		; CHECK: %loop
; CHECK: %exit		; CHECK: %exit

entry:		entry:
%cmp = fcmp une double 0.000000e+00, undef		%cmp = fcmp une double 0.000000e+00, undef
br i1 %cmp, label %loop, label %exit		br i1 %cmp, label %loop, label %exit

loop:		loop:
%cond = icmp eq i8 undef, 42		%cond = icmp eq i8 undef, 42
br i1 %cond, label %exit, label %loop		br i1 %cond, label %exit, label %loop

exit:		exit:
ret void		ret void
}		}

define void @unanalyzable_branch_to_best_succ(i1 %cond) {		define void @unanalyzable_branch_to_best_succ(i1 %cond) {
; Ensure that we can handle unanalyzable branches where the destination block		; Ensure that we can handle unanalyzable branches where the destination block
; gets selected as the optimal successor to merge.		; gets selected as the optimal successor to merge.
;		;
; This branch is now analyzable and hence the destination block becomes the		; This branch is now analyzable and hence the destination block becomes the
; hotter one. The right order is entry->bar->exit->foo.		; hotter one. The right order is entry->bar->exit->foo.
;		;
; CHECK: unanalyzable_branch_to_best_succ		; CHECK-LABEL: unanalyzable_branch_to_best_succ
; CHECK: %entry		; CHECK: %entry
; CHECK: %bar		; CHECK: %bar
; CHECK: %exit		; CHECK: %exit
; CHECK: %foo		; CHECK: %foo

entry:		entry:
; Bias this branch toward bar to ensure we form that chain.		; Bias this branch toward bar to ensure we form that chain.
br i1 %cond, label %bar, label %foo, !prof !1		br i1 %cond, label %bar, label %foo, !prof !1
Show All 9 Lines
exit:		exit:
ret void		ret void
}		}

define void @unanalyzable_branch_to_free_block(float %x) {		define void @unanalyzable_branch_to_free_block(float %x) {
; Ensure that we can handle unanalyzable branches where the destination block		; Ensure that we can handle unanalyzable branches where the destination block
; gets selected as the best free block in the CFG.		; gets selected as the best free block in the CFG.
;		;
; CHECK: unanalyzable_branch_to_free_block		; CHECK-LABEL: unanalyzable_branch_to_free_block
; CHECK: %entry		; CHECK: %entry
; CHECK: %a		; CHECK: %a
; CHECK: %b		; CHECK: %b
; CHECK: %c
; CHECK: %exit		; CHECK: %exit
		; CHECK: %c
		; CHECK: retl

entry:		entry:
br i1 undef, label %a, label %b		br i1 undef, label %a, label %b

a:		a:
call i32 @f()		call i32 @f()
br label %c		br label %c

b:		b:
%cmp = fcmp une float %x, undef		%cmp = fcmp une float %x, undef
br i1 %cmp, label %c, label %exit		br i1 %cmp, label %c, label %exit

c:		c:
call i32 @g()		call i32 @g()
br label %exit		br label %exit

exit:		exit:
ret void		ret void
}		}

define void @many_unanalyzable_branches() {		define void @many_unanalyzable_branches() {
; Ensure that we don't crash as we're building up many unanalyzable branches,		; Ensure that we don't crash as we're building up many unanalyzable branches,
; blocks, and loops.		; blocks, and loops.
;		;
; CHECK: many_unanalyzable_branches		; CHECK-LABEL: many_unanalyzable_branches
; CHECK: %entry		; CHECK: %entry
; CHECK: %exit		; CHECK: %exit

entry:		entry:
br label %0		br label %0

%val0 = load volatile float, float* undef		%val0 = load volatile float, float* undef
%cmp0 = fcmp une float %val0, undef		%cmp0 = fcmp une float %val0, undef
▲ Show 20 Lines • Show All 202 Lines • ▼ Show 20 Lines
; 1) Loop rotation needs to ensure that the desired exiting edge can be		; 1) Loop rotation needs to ensure that the desired exiting edge can be
; a fallthrough.		; a fallthrough.
; 2) The exiting edge from the loop which is rotated to be laid out at the		; 2) The exiting edge from the loop which is rotated to be laid out at the
; bottom of the loop needs to be exiting into the nearest enclosing loop (to		; bottom of the loop needs to be exiting into the nearest enclosing loop (to
; which there is an exit). Otherwise, we force that enclosing loop into		; which there is an exit). Otherwise, we force that enclosing loop into
; strange layouts that are siginificantly less efficient, often times maing		; strange layouts that are siginificantly less efficient, often times maing
; it discontiguous.		; it discontiguous.
;		;
; CHECK: @benchmark_heapsort		; CHECK-LABEL: @benchmark_heapsort
; CHECK: %entry		; CHECK: %entry
; First rotated loop top.		; First rotated loop top.
; CHECK: .p2align		; CHECK: .p2align
; CHECK: %while.end		; CHECK: %while.end
; %for.cond gets completely tail-duplicated away.		; %for.cond gets completely tail-duplicated away.
; CHECK: %if.then		; CHECK: %if.then
; CHECK: %if.else		; CHECK: %if.else
; CHECK: %if.end10		; CHECK: %if.end10
▲ Show 20 Lines • Show All 505 Lines • Show Last 20 Lines

test/CodeGen/X86/cmovcmov.ll

	Show First 20 Lines • Show All 186 Lines • ▼ Show 20 Lines
	; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0			; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0
	; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]			; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
	; CMOV-NEXT: jp [[TBB]]			; CMOV-NEXT: jp [[TBB]]
	; CMOV-NEXT: xorps %xmm0, %xmm0			; CMOV-NEXT: xorps %xmm0, %xmm0
	; CMOV-NEXT: [[TBB]]:			; CMOV-NEXT: [[TBB]]:
	; CMOV-NEXT: retq			; CMOV-NEXT: retq

	; NOCMOV: jne			; NOCMOV: jne
	; NOCMOV-NEXT: jp			; NOCMOV-NEXT: jnp
	define float @test_zext_fcmp_une(float %a, float %b) #0 {			define float @test_zext_fcmp_une(float %a, float %b) #0 {
	entry:			entry:
	%cmp = fcmp une float %a, %b			%cmp = fcmp une float %a, %b
	%conv1 = zext i1 %cmp to i32			%conv1 = zext i1 %cmp to i32
	%conv2 = sitofp i32 %conv1 to float			%conv2 = sitofp i32 %conv1 to float
	ret float %conv2			ret float %conv2
	}			}

	; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:			; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:
	; CMOV-NEXT: .long 1065353216			; CMOV-NEXT: .long 1065353216

	; CHECK-LABEL: test_zext_fcmp_oeq:			; CHECK-LABEL: test_zext_fcmp_oeq:
	; CMOV-NEXT: ucomiss %xmm1, %xmm0			; CMOV-NEXT: ucomiss %xmm1, %xmm0
	; CMOV-NEXT: xorps %xmm0, %xmm0			; CMOV-NEXT: xorps %xmm0, %xmm0
	; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]			; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
	; CMOV-NEXT: jp [[TBB]]			; CMOV-NEXT: jp [[TBB]]
	; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0			; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0
	; CMOV-NEXT: [[TBB]]:			; CMOV-NEXT: [[TBB]]:
	; CMOV-NEXT: retq			; CMOV-NEXT: retq

	; NOCMOV: jne			; NOCMOV: jne
	; NOCMOV-NEXT: jp			; NOCMOV-NEXT: jnp
	define float @test_zext_fcmp_oeq(float %a, float %b) #0 {			define float @test_zext_fcmp_oeq(float %a, float %b) #0 {
	entry:			entry:
	%cmp = fcmp oeq float %a, %b			%cmp = fcmp oeq float %a, %b
	%conv1 = zext i1 %cmp to i32			%conv1 = zext i1 %cmp to i32
	%conv2 = sitofp i32 %conv1 to float			%conv2 = sitofp i32 %conv1 to float
	ret float %conv2			ret float %conv2
	}			}

	▲ Show 20 Lines • Show All 50 Lines • Show Last 20 Lines

test/CodeGen/X86/critical-edge-split-2.ll

	Show All 18 Lines

	cond.end.i: ; preds = %entry			cond.end.i: ; preds = %entry
	%call1 = phi i16 [ trunc (i32 srem (i32 1, i32 zext (i1 icmp eq (%1* bitcast (i8* getelementptr inbounds (%0, %0* @g_2, i64 0, i32 1, i32 0) to %1), %1 @g_4) to i32)) to i16), %cond.false.i ], [ 1, %entry ]			%call1 = phi i16 [ trunc (i32 srem (i32 1, i32 zext (i1 icmp eq (%1* bitcast (i8* getelementptr inbounds (%0, %0* @g_2, i64 0, i32 1, i32 0) to %1), %1 @g_4) to i32)) to i16), %cond.false.i ], [ 1, %entry ]
	ret i16 %call1			ret i16 %call1
	}			}

	; CHECK-LABEL: test1:			; CHECK-LABEL: test1:
	; CHECK: testb %dil, %dil			; CHECK: testb %dil, %dil
	; CHECK: jne LBB0_2			; CHECK: je LBB0_1
				; CHECK: retq
				; CHECK: LBB0_1:
	; CHECK: divl			; CHECK: divl
	; CHECK: LBB0_2:

test/CodeGen/X86/shrink-wrap-chkstk.ll

	Show First 20 Lines • Show All 56 Lines • ▼ Show 20 Lines
	false:			false:
	%tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]			%tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
	ret i32 %tmp.0			ret i32 %tmp.0
	}			}

	; CHECK-LABEL: @use_eax_before_prologue@8: # @use_eax_before_prologue			; CHECK-LABEL: @use_eax_before_prologue@8: # @use_eax_before_prologue
	; CHECK: movl %ecx, %eax			; CHECK: movl %ecx, %eax
	; CHECK: cmpl %edx, %eax			; CHECK: cmpl %edx, %eax
	; CHECK: jge LBB1_2			; CHECK: jl LBB1_1
				; CHECK: retl
				; CHECK: LBB1_1
	; CHECK: pushl %eax			; CHECK: pushl %eax
	; CHECK: movl $4092, %eax			; CHECK: movl $4092, %eax
	; CHECK: calll __chkstk			; CHECK: calll __chkstk
	; CHECK: movl 4092(%esp), %eax			; CHECK: movl 4092(%esp), %eax
	; CHECK: calll _doSomething			; CHECK: calll _doSomething
	; CHECK: LBB1_2:
	; CHECK: retl			; CHECK: retl

test/CodeGen/X86/twoaddr-coalesce-3.ll

Show All 13 Lines	entry:
br i1 %cmp3, label %for.body.lr.ph, label %for.end		br i1 %cmp3, label %for.body.lr.ph, label %for.end

for.body.lr.ph: ; preds = %entry		for.body.lr.ph: ; preds = %entry
%total.promoted = load i32, i32* @total, align 4		%total.promoted = load i32, i32* @total, align 4
br label %for.body		br label %for.body

; Check that only one mov will be generated in the kernel loop.		; Check that only one mov will be generated in the kernel loop.
; CHECK-LABEL: foo:		; CHECK-LABEL: foo:
; CHECK: [[LOOP1:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body		; CHECK: [[LOOP1:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body{{$}}
; CHECK-NOT: mov		; CHECK-NOT: mov
; CHECK: movl {{.*}}, [[REG1:%[a-z0-9]+]]		; CHECK: movl {{.*}}, [[REG1:%[a-z0-9]+]]
; CHECK-NOT: mov		; CHECK-NOT: mov
; CHECK: shrl $31, [[REG1]]		; CHECK: shrl $31, [[REG1]]
; CHECK-NOT: mov		; CHECK-NOT: mov
; CHECK: jl [[LOOP1]]		; CHECK: jl [[LOOP1]]
for.body: ; preds = %for.body.lr.ph, %for.body		for.body: ; preds = %for.body.lr.ph, %for.body
%add5 = phi i32 [ %total.promoted, %for.body.lr.ph ], [ %add, %for.body ]		%add5 = phi i32 [ %total.promoted, %for.body.lr.ph ], [ %add, %for.body ]
Show All 20 Lines	entry:
br i1 %cmp3, label %for.body.lr.ph, label %for.end		br i1 %cmp3, label %for.body.lr.ph, label %for.end

for.body.lr.ph: ; preds = %entry		for.body.lr.ph: ; preds = %entry
%total.promoted = load i32, i32* @total, align 4		%total.promoted = load i32, i32* @total, align 4
br label %for.body		br label %for.body

; Check that only two mov will be generated in the kernel loop.		; Check that only two mov will be generated in the kernel loop.
; CHECK-LABEL: goo:		; CHECK-LABEL: goo:
; CHECK: [[LOOP2:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body		; CHECK: [[LOOP2:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body{{$}}
; CHECK-NOT: mov		; CHECK-NOT: mov
; CHECK: movl {{.*}}, [[REG2:%[a-z0-9]+]]		; CHECK: movl {{.*}}, [[REG2:%[a-z0-9]+]]
; CHECK-NOT: mov		; CHECK-NOT: mov
; CHECK: shrl $31, [[REG2]]		; CHECK: shrl $31, [[REG2]]
; CHECK-NOT: mov		; CHECK-NOT: mov
; CHECK: movl {{.*}}		; CHECK: movl {{.*}}
; CHECK: jl [[LOOP2]]		; CHECK: jl [[LOOP2]]
for.body: ; preds = %for.body.lr.ph, %for.body		for.body: ; preds = %for.body.lr.ph, %for.body
Show All 17 Lines

test/CodeGen/X86/x86-shrink-wrap-unwind.ll

	Show All 18 Lines
	; Prologue code.			; Prologue code.
	; (What we push does not matter. It should be some random sratch register.)			; (What we push does not matter. It should be some random sratch register.)
	; CHECK: pushq			; CHECK: pushq
	;			;
	; Compare the arguments and jump to exit.			; Compare the arguments and jump to exit.
	; After the prologue is set.			; After the prologue is set.
	; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]]			; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]]
	; CHECK-NEXT: cmpl %esi, [[ARG0CPY]]			; CHECK-NEXT: cmpl %esi, [[ARG0CPY]]
	; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]			; CHECK-NEXT: jl [[SUCCESS_LABEL:LBB[0-9_]+]]
				; CHECK: popq
				; CHECK-NEXT: retq
	;			;
	; Store %a in the alloca.			; Store %a in the alloca.
	; CHECK: movl [[ARG0CPY]], 4(%rsp)			; CHECK: movl [[ARG0CPY]], 4(%rsp)
	; Set the alloca address in the second argument.			; Set the alloca address in the second argument.
	; CHECK-NEXT: leaq 4(%rsp), %rsi			; CHECK-NEXT: leaq 4(%rsp), %rsi
	; Set the first argument to zero.			; Set the first argument to zero.
	; CHECK-NEXT: xorl %edi, %edi			; CHECK-NEXT: xorl %edi, %edi
	; CHECK-NEXT: callq _doSomething			; CHECK-NEXT: callq _doSomething
	;
	; CHECK: [[EXIT_LABEL]]:
	;
	; Without shrink-wrapping, epilogue is in the exit block.
	; Epilogue code. (What we pop does not matter.)
	; CHECK-NEXT: popq			; CHECK-NEXT: popq
	;
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
				;
	define i32 @framelessUnwind(i32 %a, i32 %b) #0 {			define i32 @framelessUnwind(i32 %a, i32 %b) #0 {
	%tmp = alloca i32, align 4			%tmp = alloca i32, align 4
	%tmp2 = icmp slt i32 %a, %b			%tmp2 = icmp slt i32 %a, %b
	br i1 %tmp2, label %true, label %false			br i1 %tmp2, label %true, label %false

	true:			true:
	store i32 %a, i32* %tmp, align 4			store i32 %a, i32* %tmp, align 4
	%tmp4 = call i32 @doSomething(i32 0, i32* %tmp)			%tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
	Show All 13 Lines
	;			;
	; Compare the arguments and jump to exit.			; Compare the arguments and jump to exit.
	; No prologue needed.			; No prologue needed.
	;			;
	; Compare the arguments and jump to exit.			; Compare the arguments and jump to exit.
	; After the prologue is set.			; After the prologue is set.
	; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]]			; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]]
	; CHECK-NEXT: cmpl %esi, [[ARG0CPY]]			; CHECK-NEXT: cmpl %esi, [[ARG0CPY]]
	; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]			; CHECK-NEXT: jl [[SUCCESS_LABEL:LBB[0-9_]+]]
				; CHECK: retq
	;			;
	; Prologue code.			; Prologue code.
				; CHECK-NEXT: [[SUCCESS_LABEL]]
	; CHECK: pushq %rbp			; CHECK: pushq %rbp
	; CHECK: movq %rsp, %rbp			; CHECK: movq %rsp, %rbp
	;			;
	; Store %a in the alloca.			; Store %a in the alloca.
	; CHECK: movl [[ARG0CPY]], -4(%rbp)			; CHECK: movl [[ARG0CPY]], -4(%rbp)
	; Set the alloca address in the second argument.			; Set the alloca address in the second argument.
	; CHECK-NEXT: leaq -4(%rbp), %rsi			; CHECK-NEXT: leaq -4(%rbp), %rsi
	; Set the first argument to zero.			; Set the first argument to zero.
	; CHECK-NEXT: xorl %edi, %edi			; CHECK-NEXT: xorl %edi, %edi
	; CHECK-NEXT: callq _doSomething			; CHECK-NEXT: callq _doSomething
	;			;
	; Epilogue code. (What we pop does not matter.)			; Epilogue code. (What we pop does not matter.)
	; CHECK: popq %rbp			; CHECK: popq %rbp
	;
	; CHECK: [[EXIT_LABEL]]:
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
				;
	define i32 @frameUnwind(i32 %a, i32 %b) #1 {			define i32 @frameUnwind(i32 %a, i32 %b) #1 {
	%tmp = alloca i32, align 4			%tmp = alloca i32, align 4
	%tmp2 = icmp slt i32 %a, %b			%tmp2 = icmp slt i32 %a, %b
	br i1 %tmp2, label %true, label %false			br i1 %tmp2, label %true, label %false

	true:			true:
	store i32 %a, i32* %tmp, align 4			store i32 %a, i32* %tmp, align 4
	%tmp4 = call i32 @doSomething(i32 0, i32* %tmp)			%tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
	Show All 11 Lines
	;			;
	; Compare the arguments and jump to exit.			; Compare the arguments and jump to exit.
	; No prologue needed.			; No prologue needed.
	;			;
	; Compare the arguments and jump to exit.			; Compare the arguments and jump to exit.
	; After the prologue is set.			; After the prologue is set.
	; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]]			; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]]
	; CHECK-NEXT: cmpl %esi, [[ARG0CPY]]			; CHECK-NEXT: cmpl %esi, [[ARG0CPY]]
	; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]			; CHECK-NEXT: jl [[SUCCESS_LABEL:LBB[0-9_]+]]
				; CHECK: retq
	;			;
	; Prologue code.			; Prologue code.
	; (What we push does not matter. It should be some random sratch register.)			; (What we push does not matter. It should be some random sratch register.)
				; CHECK-NEXT: [[SUCCESS_LABEL]]
	; CHECK: pushq			; CHECK: pushq
	;			;
	; Store %a in the alloca.			; Store %a in the alloca.
	; CHECK: movl [[ARG0CPY]], 4(%rsp)			; CHECK: movl [[ARG0CPY]], 4(%rsp)
	; Set the alloca address in the second argument.			; Set the alloca address in the second argument.
	; CHECK-NEXT: leaq 4(%rsp), %rsi			; CHECK-NEXT: leaq 4(%rsp), %rsi
	; Set the first argument to zero.			; Set the first argument to zero.
	; CHECK-NEXT: xorl %edi, %edi			; CHECK-NEXT: xorl %edi, %edi
	; CHECK-NEXT: callq _doSomething			; CHECK-NEXT: callq _doSomething
	;			;
	; Epilogue code.			; Epilogue code.
	; CHECK-NEXT: addq			; CHECK-NEXT: addq
	;
	; CHECK: [[EXIT_LABEL]]:
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	define i32 @framelessnoUnwind(i32 %a, i32 %b) #2 {			define i32 @framelessnoUnwind(i32 %a, i32 %b) #2 {
	%tmp = alloca i32, align 4			%tmp = alloca i32, align 4
	%tmp2 = icmp slt i32 %a, %b			%tmp2 = icmp slt i32 %a, %b
	br i1 %tmp2, label %true, label %false			br i1 %tmp2, label %true, label %false

	true:			true:
	store i32 %a, i32* %tmp, align 4			store i32 %a, i32* %tmp, align 4
	▲ Show 20 Lines • Show All 80 Lines • Show Last 20 Lines

test/CodeGen/X86/x86-shrink-wrapping.ll

	Show All 12 Lines

	; Initial motivating example: Simple diamond with a call just on one side.			; Initial motivating example: Simple diamond with a call just on one side.
	; CHECK-LABEL: foo:			; CHECK-LABEL: foo:
	;			;
	; Compare the arguments and jump to exit.			; Compare the arguments and jump to exit.
	; No prologue needed.			; No prologue needed.
	; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]]			; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]]
	; ENABLE-NEXT: cmpl %esi, [[ARG0CPY]]			; ENABLE-NEXT: cmpl %esi, [[ARG0CPY]]
	; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]			; ENABLE-NEXT: jl [[SUCCESS_LABEL:LBB[0-9_]+]]
				; ENABLE: retq
	;			;
	; Prologue code.			; Prologue code.
	; (What we push does not matter. It should be some random sratch register.)			; (What we push does not matter. It should be some random sratch register.)
				; ENABLE: [[SUCCESS_LABEL]]:
	; CHECK: pushq			; CHECK: pushq
	;			;
	; Compare the arguments and jump to exit.			; Compare the arguments and jump to exit.
	; After the prologue is set.			; After the prologue is set.
	; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]]			; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]]
	; DISABLE-NEXT: cmpl %esi, [[ARG0CPY]]			; DISABLE-NEXT: cmpl %esi, [[ARG0CPY]]
	; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]			; DISABLE-NEXT: jl [[SUCCESS_LABEL:LBB[0-9_]+]]
	;			;
				; DISABLE: popq
				; DISABLE-NEXT: retq

				; DISABLE: [[SUCCESS_LABEL]]:
	; Store %a in the alloca.			; Store %a in the alloca.
	; CHECK: movl [[ARG0CPY]], 4(%rsp)			; CHECK: movl [[ARG0CPY]], 4(%rsp)
	; Set the alloca address in the second argument.			; Set the alloca address in the second argument.
	; CHECK-NEXT: leaq 4(%rsp), %rsi			; CHECK-NEXT: leaq 4(%rsp), %rsi
	; Set the first argument to zero.			; Set the first argument to zero.
	; CHECK-NEXT: xorl %edi, %edi			; CHECK-NEXT: xorl %edi, %edi
	; CHECK-NEXT: callq _doSomething			; CHECK-NEXT: callq _doSomething
	;
	; With shrink-wrapping, epilogue is just after the call.			; With shrink-wrapping, epilogue is just after the call.
	; ENABLE-NEXT: addq $8, %rsp			; ENABLE-NEXT: addq $8, %rsp
	;
	; CHECK: [[EXIT_LABEL]]:
	;
	; Without shrink-wrapping, epilogue is in the exit block.
	; Epilogue code. (What we pop does not matter.)
	; DISABLE-NEXT: popq			; DISABLE-NEXT: popq
	;
	; CHECK-NEXT: retq			; CHECK-NEXT: retq

	define i32 @foo(i32 %a, i32 %b) {			define i32 @foo(i32 %a, i32 %b) {
	%tmp = alloca i32, align 4			%tmp = alloca i32, align 4
	%tmp2 = icmp slt i32 %a, %b			%tmp2 = icmp slt i32 %a, %b
	br i1 %tmp2, label %true, label %false			br i1 %tmp2, label %true, label %false

	true:			true:
	store i32 %a, i32* %tmp, align 4			store i32 %a, i32* %tmp, align 4
	%tmp4 = call i32 @doSomething(i32 0, i32* %tmp)			%tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
	▲ Show 20 Lines • Show All 926 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

Codegen: Make chains from lattice-shaped CFGsAbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 67760

include/llvm/Analysis/LoopInfoImpl.h

lib/CodeGen/MachineBlockPlacement.cpp

test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll

test/CodeGen/AArch64/arm64-atomic.ll

test/CodeGen/AArch64/arm64-ccmp.ll

test/CodeGen/AArch64/arm64-shrink-wrapping.ll

test/CodeGen/AArch64/branch-relax-bcc.ll

test/CodeGen/AArch64/combine-comparisons-by-cse.ll

test/CodeGen/AArch64/fcmp.ll

test/CodeGen/AArch64/rm_redundant_cmp.ll

test/CodeGen/AArch64/tbz-tbnz.ll

test/CodeGen/AMDGPU/cf-loop-on-constant.ll

test/CodeGen/AMDGPU/convergent-inlineasm.ll

test/CodeGen/AMDGPU/salu-to-valu.ll

test/CodeGen/AMDGPU/si-annotate-cf.ll

test/CodeGen/AMDGPU/skip-if-dead.ll

test/CodeGen/AMDGPU/smrd-vccz-bug.ll

test/CodeGen/AMDGPU/uniform-cfg.ll

test/CodeGen/AMDGPU/uniform-crash.ll

test/CodeGen/AMDGPU/valu-i1.ll

test/CodeGen/ARM/2013-05-05-IfConvertBug.ll

test/CodeGen/ARM/arm-shrink-wrapping.ll

test/CodeGen/ARM/atomic-cmpxchg.ll

test/CodeGen/ARM/atomic-op.ll

test/CodeGen/ARM/atomic-ops-v8.ll

test/CodeGen/ARM/fold-stack-adjust.ll

test/CodeGen/ARM/machine-cse-cmp.ll

test/CodeGen/Mips/llvm-ir/ashr.ll

test/CodeGen/Mips/llvm-ir/lshr.ll

test/CodeGen/Mips/llvm-ir/shl.ll

test/CodeGen/Mips/longbranch.ll

test/CodeGen/PowerPC/bdzlr.ll

test/CodeGen/PowerPC/tail-dup-layout.ll

test/CodeGen/SPARC/sjlj.ll

test/CodeGen/SystemZ/tdc-06.ll

test/CodeGen/Thumb/thumb-shrink-wrapping.ll

test/CodeGen/Thumb2/cbnz.ll

test/CodeGen/Thumb2/ifcvt-compare.ll

test/CodeGen/WebAssembly/mem-intrinsics.ll

test/CodeGen/X86/2012-08-17-legalizer-crash.ll

test/CodeGen/X86/avx-splat.ll

test/CodeGen/X86/avx512-cmp.ll

test/CodeGen/X86/block-placement.ll

test/CodeGen/X86/cmovcmov.ll

test/CodeGen/X86/critical-edge-split-2.ll

test/CodeGen/X86/shrink-wrap-chkstk.ll

test/CodeGen/X86/twoaddr-coalesce-3.ll

test/CodeGen/X86/x86-shrink-wrap-unwind.ll

test/CodeGen/X86/x86-shrink-wrapping.ll

Codegen: Make chains from lattice-shaped CFGs
AbandonedPublic