This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/ARM/
-
Target/
-
ARM/
2
ARMLowOverheadLoops.cpp
-
test/CodeGen/Thumb2/LowOverheadLoops/
-
CodeGen/
-
Thumb2/
-
LowOverheadLoops/
-
dont-ignore-vctp.mir
-
it-block-chain.mir
-
it-block-itercount.mir
-
it-block-random.mir

Differential D73947

[ARM][MVE] LowOverheadLoops: DCE on the iteration count setup expression
ClosedPublic

Authored by SjoerdMeijer on Feb 4 2020, 1:42 AM.

Download Raw Diff

Details

Reviewers

samparker
dmgreen

Commits

rG01022af5d5a1: [ARM][MVE] LowOverheadLoops: DCE on the iteration count setup expression

Summary

Once we have created a tail-predicated hardware-loop, and thus know the number of elements that are processed, we want to clean-up the iteration count expression of that loop. In D73682, we bailed the analysis on conditionally executed instructions. This adds support for IT-blocks, so that we can handle these cases again. The restriction is that we only support IT blocks containing 1 statement, but that seems to cover most cases and forms of the iteration count expression.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

SjoerdMeijer created this revision.Feb 4 2020, 1:42 AM

Herald added a project: Restricted Project. · View Herald TranscriptFeb 4 2020, 1:42 AM

Herald added subscribers: hiraditya, kristof.beyls. · View Herald Transcript

Would you mind adding a couple of other tests, both for IT blocks with multiple instructions:

insert another instruction in the chain before LSL, so we have a larger predicated block.
with an instruction not in the iteration count use-def chain, but still in the same IT, something like: IT, LSL, SOME_RANDOM_INST

llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
850	Could you remove these now, they've been laying around dead for a while.
890	just returning is fine.

Thanks for looking and the suggestion. Added test case it-block-random.mir showed that I was missing a case: an instruction in the IT block not connected to the use-def chain, so fixed that.

LGTM. Cheers!

This revision is now accepted and ready to land.Feb 5 2020, 7:06 AM

Closed by commit rG01022af5d5a1: [ARM][MVE] LowOverheadLoops: DCE on the iteration count setup expression (authored by SjoerdMeijer). · Explain WhyFeb 5 2020, 7:21 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

lib/

Target/

ARM/

ARMLowOverheadLoops.cpp

117 lines

test/

CodeGen/

Thumb2/

LowOverheadLoops/

dont-ignore-vctp.mir

6 lines

	it-block-chain.mir
	dont-ignore-vctp.mir

24 lines

	it-block-itercount.mir
	dont-ignore-vctp.mir

17 lines

	it-block-random.mir
	dont-ignore-vctp.mir

20 lines

Diff 242609

llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp

Show First 20 Lines • Show All 300 Lines • ▼ Show 20 Lines	private:
void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const;		void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const;

void ConvertVPTBlocks(LowOverheadLoop &LoLoop);		void ConvertVPTBlocks(LowOverheadLoop &LoLoop);

MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop);		MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop);

void Expand(LowOverheadLoop &LoLoop);		void Expand(LowOverheadLoop &LoLoop);

		void IterationCountDCE(LowOverheadLoop &LoLoop);
};		};
}		}

char ARMLowOverheadLoops::ID = 0;		char ARMLowOverheadLoops::ID = 0;

INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,		INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
false, false)		false, false)

▲ Show 20 Lines • Show All 496 Lines • ▼ Show 20 Lines	void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
MachineInstrBuilder MIB =		MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));		BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
MIB.add(MI->getOperand(1)); // branch target		MIB.add(MI->getOperand(1)); // branch target
MIB.addImm(ARMCC::NE); // condition code		MIB.addImm(ARMCC::NE); // condition code
MIB.addReg(ARM::CPSR);		MIB.addReg(ARM::CPSR);
MI->eraseFromParent();		MI->eraseFromParent();
}		}

MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {		// Perform dead code elimation on the loop iteration count setup expression.
LLVM_DEBUG(dbgs() << "ARM Loops: Expanding LoopStart.\n");		// If we are tail-predicating, the number of elements to be processed is the
// When using tail-predication, try to delete the dead code that was used to		// operand of the VCTP instruction in the vector body, see getCount(), which is
// calculate the number of loop iterations.		// register $r3 in this example:
if (LoLoop.IsTailPredicationLegal()) {		//
SmallVector<MachineInstr*, 4> Killed;		// $lr = big-itercount-expression
SmallVector<MachineInstr*, 4> Dead;		// ..
		// t2DoLoopStart renamable $lr
		// vector.body:
		// ..
		// $vpr = MVE_VCTP32 renamable $r3
		// renamable $lr = t2LoopDec killed renamable $lr, 1
		// t2LoopEnd renamable $lr, %vector.body
		// tB %end
		//
		// What we would like achieve here is to replace the do-loop start pseudo
		// instruction t2DoLoopStart with:
		//
		// $lr = MVE_DLSTP_32 killed renamable $r3
		//
		// Thus, $r3 which defines the number of elements, is written to $lr,
		// and then we want to delete the whole chain that used to define $lr,
		// see the comment below how this chain could look like.
		//
		void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
		if (!LoLoop.IsTailPredicationLegal())
		return;

if (auto *Def = RDA->getReachingMIDef(LoLoop.Start,		if (auto *Def = RDA->getReachingMIDef(LoLoop.Start,
		samparkerUnsubmitted Not Done Reply Inline Actions Could you remove these now, they've been laying around dead for a while. samparker: Could you remove these now, they've been laying around dead for a while.
LoLoop.Start->getOperand(0).getReg())) {		LoLoop.Start->getOperand(0).getReg())) {
SmallPtrSet<MachineInstr*, 4> Remove;		SmallPtrSet<MachineInstr*, 4> Remove;
SmallPtrSet<MachineInstr*, 4> Ignore = { LoLoop.Start, LoLoop.Dec,		SmallPtrSet<MachineInstr*, 4> Ignore = { LoLoop.Start, LoLoop.Dec,
LoLoop.End, LoLoop.InsertPt };		LoLoop.End, LoLoop.InsertPt };
SmallVector<MachineInstr*, 4> Chain = { Def };		SmallVector<MachineInstr*, 4> Chain = { Def };
while (!Chain.empty()) {		while (!Chain.empty()) {
MachineInstr *MI = Chain.back();		MachineInstr *MI = Chain.back();
Chain.pop_back();		Chain.pop_back();
if (TII->getPredicate(*MI) != ARMCC::AL)
continue;		// If an instruction is conditionally executed, we assume here that this
		// an IT-block with just this single instruction in it, otherwise we
		// continue and can't perform dead-code elimination on it. This will
		// capture most cases, because the loop iteration count expression
		// that performs a round-up to next multiple of the vector length will
		// look like this:
		//
		// %mull = ..
		// %0 = add i32 %mul, 3
		// %1 = icmp slt i32 %mul, 4
		// %smin = select i1 %1, i32 %mul, i32 4
		// %2 = sub i32 %0, %smin
		// %3 = lshr i32 %2, 2
		// %4 = add nuw nsw i32 %3, 1
		//
		// There can be a select instruction, checking if we need to execute only
		// 1 vector iteration (in this examples that means 4 elements). Thus,
		// we conditionally execute one instructions to materialise the iteration
		// count.
		MachineInstr *IT = nullptr;
		if (TII->getPredicate(*MI) != ARMCC::AL) {
		auto PrevMI = std::prev(MI->getIterator());
		auto NextMI = std::next(MI->getIterator());

		if (PrevMI->getOpcode() == ARM::t2IT &&
		TII->getPredicate(*NextMI) == ARMCC::AL)
		IT = &*PrevMI;
		else
		// We can't analyse IT-blocks with multiple statements. Be
		// conservative here: clear the list, and don't remove any statements
		// at all.
		samparkerUnsubmitted Not Done Reply Inline Actions just returning is fine. samparker: just returning is fine.
		return;
		}

if (RDA->isSafeToRemove(MI, Remove, Ignore)) {		if (RDA->isSafeToRemove(MI, Remove, Ignore)) {
for (auto &MO : MI->operands()) {		for (auto &MO : MI->operands()) {
if (!MO.isReg() \|\| !MO.isUse() \|\| MO.getReg() == 0)		if (!MO.isReg() \|\| !MO.isUse() \|\| MO.getReg() == 0)
continue;		continue;
if (auto *Op = RDA->getReachingMIDef(MI, MO.getReg()))		if (auto *Op = RDA->getReachingMIDef(MI, MO.getReg()))
Chain.push_back(Op);		Chain.push_back(Op);
}		}
Ignore.insert(MI);		Ignore.insert(MI);

		if (IT)
		Remove.insert(IT);
}		}
}		}
LoLoop.ToRemove.insert(Remove.begin(), Remove.end());		LoLoop.ToRemove.insert(Remove.begin(), Remove.end());
}		}
}		}

		MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
		LLVM_DEBUG(dbgs() << "ARM Loops: Expanding LoopStart.\n");
		// When using tail-predication, try to delete the dead code that was used to
		// calculate the number of loop iterations.
		IterationCountDCE(LoLoop);

MachineInstr *InsertPt = LoLoop.InsertPt;		MachineInstr *InsertPt = LoLoop.InsertPt;
MachineInstr *Start = LoLoop.Start;		MachineInstr *Start = LoLoop.Start;
MachineBasicBlock *MBB = InsertPt->getParent();		MachineBasicBlock *MBB = InsertPt->getParent();
bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart;		bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart;
unsigned Opc = LoLoop.getStartOpcode();		unsigned Opc = LoLoop.getStartOpcode();
MachineOperand &Count = LoLoop.getCount();		MachineOperand &Count = LoLoop.getCount();

MachineInstrBuilder MIB =		MachineInstrBuilder MIB =
▲ Show 20 Lines • Show All 204 Lines • Show Last 20 Lines

llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-ignore-vctp.mir

This file was copied to llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain.mir, llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-itercount.mir, llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-random.mir.

Show First 20 Lines • Show All 94 Lines • ▼ Show 20 Lines	body: \|
; CHECK-LABEL: name: dont_ignore_vctp		; CHECK-LABEL: name: dont_ignore_vctp
; CHECK: bb.0.entry:		; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x80000000)		; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $lr, $r0, $r1, $r2, $r7		; CHECK: liveins: $lr, $r0, $r1, $r2, $r7
; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp		; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8		; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4		; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8		; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
; CHECK: renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg		; CHECK: renamable $r3, dead $cpsr = tLSLri killed renamable $r2, 1, 14, $noreg
; CHECK: renamable $r12 = t2MOVi 4, 14, $noreg, $noreg
; CHECK: tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: dead $r12 = t2LSLri killed renamable $r2, 1, 11, killed $cpsr, $noreg, implicit killed renamable $r12, implicit killed $itstate
; CHECK: renamable $r2 = tLEApcrel %const.0, 14, $noreg		; CHECK: renamable $r2 = tLEApcrel %const.0, 14, $noreg
; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)		; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3		; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
; CHECK: bb.1.do.body (align 4):		; CHECK: bb.1.do.body (align 4):
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)		; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: liveins: $lr, $q0, $r0, $r1		; CHECK: liveins: $lr, $q0, $r0, $r1
; CHECK: renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 0, $noreg		; CHECK: renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 0, $noreg
; CHECK: renamable $q1 = nnan ninf nsz MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1		; CHECK: renamable $q1 = nnan ninf nsz MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1
▲ Show 20 Lines • Show All 52 Lines • Show Last 20 Lines

llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain.mir

This file was copied from llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-ignore-vctp.mir.

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py		# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs \| FileCheck %s		# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs \| FileCheck %s

		# IT-block with 3 statements, all chained together.

--- \|		--- \|
define hidden arm_aapcs_vfpcc void @dont_ignore_vctp(float* %pSrc, float* %pDst, i32 %blockSize) local_unnamed_addr #0 {		define hidden arm_aapcs_vfpcc void @it_block_2_stmts(float* %pSrc, float* %pDst, i32 %blockSize) local_unnamed_addr #0 {
entry:		entry:
%mul = shl i32 %blockSize, 1		%mul = shl i32 %blockSize, 1
%0 = add i32 %mul, 3		%0 = add i32 %mul, 3
%1 = icmp slt i32 %mul, 4		%1 = icmp slt i32 %mul, 4
%smin = select i1 %1, i32 %mul, i32 4		%smin = select i1 %1, i32 %mul, i32 4
%2 = sub i32 %0, %smin		%2 = sub i32 %0, %smin
%3 = lshr i32 %2, 2		%3 = lshr i32 %2, 2
%4 = add nuw nsw i32 %3, 1		%4 = add nuw nsw i32 %3, 1
Show All 24 Lines	--- \|
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1		declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)		declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)		declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)		declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)		declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)

...		...
---		---
name: dont_ignore_vctp		name: it_block_2_stmts
alignment: 16		alignment: 16
exposesReturnsTwice: false		exposesReturnsTwice: false
legalized: false		legalized: false
regBankSelected: false		regBankSelected: false
selected: false		selected: false
failedISel: false		failedISel: false
tracksRegLiveness: true		tracksRegLiveness: true
hasWinCFI: false		hasWinCFI: false
Show All 32 Lines
callSites: []		callSites: []
constants:		constants:
- id: 0		- id: 0
value: '<4 x float> <float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00>'		value: '<4 x float> <float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00>'
alignment: 16		alignment: 16
isTargetSpecific: false		isTargetSpecific: false
machineFunctionInfo: {}		machineFunctionInfo: {}
body: \|		body: \|
; CHECK-LABEL: name: dont_ignore_vctp		; CHECK-LABEL: name: it_block_2_stmts
; CHECK: bb.0.entry:		; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x80000000)		; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $lr, $r0, $r1, $r2, $r7		; CHECK: liveins: $lr, $r0, $r2, $r7
; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp		; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8		; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4		; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8		; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
; CHECK: renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg		; CHECK: renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg
; CHECK: renamable $r12 = t2MOVi 4, 14, $noreg, $noreg		; CHECK: renamable $r12 = t2MOVi 4, 14, $noreg, $noreg
; CHECK: tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def $cpsr		; CHECK: tCMPi8 killed renamable $r3, 4, 14, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate		; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: dead $r12 = t2LSLri killed renamable $r2, 1, 11, killed $cpsr, $noreg, implicit killed renamable $r12, implicit killed $itstate		; CHECK: $r1 = t2ADDri renamable $r0, 3, 11, $noreg, $noreg, implicit $itstate
		; CHECK: $r3 = t2LSLri renamable $r2, 1, 11, $cpsr, $noreg, implicit renamable $r12, implicit $itstate
		; CHECK: $r12 = t2LSLri renamable $r3, 1, 11, killed $cpsr, $noreg, implicit killed renamable $r12, implicit killed $itstate
		; CHECK: renamable $r2 = t2RSBrs killed renamable $r12, killed renamable $r2, 10, 14, $noreg, $noreg
		; CHECK: renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg
		; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg
		; CHECK: dead renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
; CHECK: renamable $r2 = tLEApcrel %const.0, 14, $noreg		; CHECK: renamable $r2 = tLEApcrel %const.0, 14, $noreg
; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)		; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3		; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
; CHECK: bb.1.do.body (align 4):		; CHECK: bb.1.do.body (align 4):
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)		; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: liveins: $lr, $q0, $r0, $r1		; CHECK: liveins: $lr, $q0, $r0, $r1
; CHECK: renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 0, $noreg		; CHECK: renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 0, $noreg
; CHECK: renamable $q1 = nnan ninf nsz MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1		; CHECK: renamable $q1 = nnan ninf nsz MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1
Show All 12 Lines	bb.0.entry:
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp		frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8		frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4		frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8		frame-setup CFI_INSTRUCTION offset $r7, -8
renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg		renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg
renamable $r12 = t2MOVi 4, 14, $noreg, $noreg		renamable $r12 = t2MOVi 4, 14, $noreg, $noreg
tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def $cpsr		tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def $cpsr
t2IT 11, 8, implicit-def $itstate		t2IT 11, 8, implicit-def $itstate
$r12 = t2LSLri renamable $r2, 1, 11, killed $cpsr, $noreg, implicit killed renamable $r12, implicit killed $itstate		$r1 = t2ADDri killed renamable $r0, 3, 11, $noreg, $noreg, implicit $itstate
		$r3 = t2LSLri renamable $r2, 1, 11, $cpsr, $noreg, implicit renamable $r12, implicit $itstate
		$r12 = t2LSLri renamable $r3, 1, 11, killed $cpsr, $noreg, implicit killed renamable $r12, implicit killed $itstate
renamable $r2 = t2RSBrs killed renamable $r12, killed renamable $r2, 10, 14, $noreg, $noreg		renamable $r2 = t2RSBrs killed renamable $r12, killed renamable $r2, 10, 14, $noreg, $noreg
renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg		renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg
renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg		renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg		renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
renamable $r2 = tLEApcrel %const.0, 14, $noreg		renamable $r2 = tLEApcrel %const.0, 14, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)		renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
t2DoLoopStart renamable $lr		t2DoLoopStart renamable $lr

Show All 23 Lines

llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-itercount.mir

This file was copied from llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-ignore-vctp.mir.

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py		# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs \| FileCheck %s		# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs \| FileCheck %s

		# IT-block with 2 statements, which we don't support yet, so check that we do
		# not remove any of the iteration count statements.

--- \|		--- \|
define hidden arm_aapcs_vfpcc void @dont_ignore_vctp(float* %pSrc, float* %pDst, i32 %blockSize) local_unnamed_addr #0 {		define hidden arm_aapcs_vfpcc void @it_block_2_stmts(float* %pSrc, float* %pDst, i32 %blockSize) local_unnamed_addr #0 {
entry:		entry:
%mul = shl i32 %blockSize, 1		%mul = shl i32 %blockSize, 1
%0 = add i32 %mul, 3		%0 = add i32 %mul, 3
%1 = icmp slt i32 %mul, 4		%1 = icmp slt i32 %mul, 4
%smin = select i1 %1, i32 %mul, i32 4		%smin = select i1 %1, i32 %mul, i32 4
%2 = sub i32 %0, %smin		%2 = sub i32 %0, %smin
%3 = lshr i32 %2, 2		%3 = lshr i32 %2, 2
%4 = add nuw nsw i32 %3, 1		%4 = add nuw nsw i32 %3, 1
Show All 24 Lines	--- \|
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1		declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)		declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)		declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)		declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)		declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)

...		...
---		---
name: dont_ignore_vctp		name: it_block_2_stmts
alignment: 16		alignment: 16
exposesReturnsTwice: false		exposesReturnsTwice: false
legalized: false		legalized: false
regBankSelected: false		regBankSelected: false
selected: false		selected: false
failedISel: false		failedISel: false
tracksRegLiveness: true		tracksRegLiveness: true
hasWinCFI: false		hasWinCFI: false
Show All 32 Lines
callSites: []		callSites: []
constants:		constants:
- id: 0		- id: 0
value: '<4 x float> <float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00>'		value: '<4 x float> <float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00>'
alignment: 16		alignment: 16
isTargetSpecific: false		isTargetSpecific: false
machineFunctionInfo: {}		machineFunctionInfo: {}
body: \|		body: \|
; CHECK-LABEL: name: dont_ignore_vctp		; CHECK-LABEL: name: it_block_2_stmts
; CHECK: bb.0.entry:		; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x80000000)		; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $lr, $r0, $r1, $r2, $r7		; CHECK: liveins: $lr, $r0, $r1, $r2, $r7
; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp		; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8		; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4		; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8		; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
; CHECK: renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg		; CHECK: renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg
; CHECK: renamable $r12 = t2MOVi 4, 14, $noreg, $noreg		; CHECK: renamable $r12 = t2MOVi 4, 14, $noreg, $noreg
; CHECK: tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def $cpsr		; CHECK: tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate		; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: dead $r12 = t2LSLri killed renamable $r2, 1, 11, killed $cpsr, $noreg, implicit killed renamable $r12, implicit killed $itstate		; CHECK: $r12 = t2LSLri renamable $r2, 1, 11, $cpsr, $noreg, implicit killed renamable $r12, implicit $itstate
		; CHECK: $r12 = t2LSLri renamable $r2, 1, 11, killed $cpsr, $noreg, implicit killed renamable $r12, implicit killed $itstate
		; CHECK: renamable $r2 = t2RSBrs killed renamable $r12, killed renamable $r2, 10, 14, $noreg, $noreg
		; CHECK: renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg
		; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg
		; CHECK: dead renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
; CHECK: renamable $r2 = tLEApcrel %const.0, 14, $noreg		; CHECK: renamable $r2 = tLEApcrel %const.0, 14, $noreg
; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)		; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3		; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
; CHECK: bb.1.do.body (align 4):		; CHECK: bb.1.do.body (align 4):
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)		; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: liveins: $lr, $q0, $r0, $r1		; CHECK: liveins: $lr, $q0, $r0, $r1
; CHECK: renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 0, $noreg		; CHECK: renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 0, $noreg
; CHECK: renamable $q1 = nnan ninf nsz MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1		; CHECK: renamable $q1 = nnan ninf nsz MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1
Show All 12 Lines	bb.0.entry:
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp		frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8		frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4		frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8		frame-setup CFI_INSTRUCTION offset $r7, -8
renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg		renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg
renamable $r12 = t2MOVi 4, 14, $noreg, $noreg		renamable $r12 = t2MOVi 4, 14, $noreg, $noreg
tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def $cpsr		tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def $cpsr
t2IT 11, 8, implicit-def $itstate		t2IT 11, 8, implicit-def $itstate
		$r12 = t2LSLri renamable $r2, 1, 11, $cpsr, $noreg, implicit renamable $r12, implicit $itstate
$r12 = t2LSLri renamable $r2, 1, 11, killed $cpsr, $noreg, implicit killed renamable $r12, implicit killed $itstate		$r12 = t2LSLri renamable $r2, 1, 11, killed $cpsr, $noreg, implicit killed renamable $r12, implicit killed $itstate
renamable $r2 = t2RSBrs killed renamable $r12, killed renamable $r2, 10, 14, $noreg, $noreg		renamable $r2 = t2RSBrs killed renamable $r12, killed renamable $r2, 10, 14, $noreg, $noreg
renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg		renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg
renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg		renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg		renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
renamable $r2 = tLEApcrel %const.0, 14, $noreg		renamable $r2 = tLEApcrel %const.0, 14, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)		renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
t2DoLoopStart renamable $lr		t2DoLoopStart renamable $lr
Show All 24 Lines

llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-random.mir

This file was copied from llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-ignore-vctp.mir.

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py		# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs \| FileCheck %s		# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs \| FileCheck %s

		# IT-block with 2 statements, with the last instruction not connected to the
		# use-def chain of the iteration counter; make sure we don't remove the
		# IT block and any of its instructions.

--- \|		--- \|
define hidden arm_aapcs_vfpcc void @dont_ignore_vctp(float* %pSrc, float* %pDst, i32 %blockSize) local_unnamed_addr #0 {		define hidden arm_aapcs_vfpcc void @it_block_2_stmts(float* %pSrc, float* %pDst, i32 %blockSize) local_unnamed_addr #0 {
entry:		entry:
%mul = shl i32 %blockSize, 1		%mul = shl i32 %blockSize, 1
%0 = add i32 %mul, 3		%0 = add i32 %mul, 3
%1 = icmp slt i32 %mul, 4		%1 = icmp slt i32 %mul, 4
%smin = select i1 %1, i32 %mul, i32 4		%smin = select i1 %1, i32 %mul, i32 4
%2 = sub i32 %0, %smin		%2 = sub i32 %0, %smin
%3 = lshr i32 %2, 2		%3 = lshr i32 %2, 2
%4 = add nuw nsw i32 %3, 1		%4 = add nuw nsw i32 %3, 1
Show All 24 Lines	--- \|
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1		declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)		declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)		declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)		declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)		declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)

...		...
---		---
name: dont_ignore_vctp		name: it_block_2_stmts
alignment: 16		alignment: 16
exposesReturnsTwice: false		exposesReturnsTwice: false
legalized: false		legalized: false
regBankSelected: false		regBankSelected: false
selected: false		selected: false
failedISel: false		failedISel: false
tracksRegLiveness: true		tracksRegLiveness: true
hasWinCFI: false		hasWinCFI: false
Show All 32 Lines
callSites: []		callSites: []
constants:		constants:
- id: 0		- id: 0
value: '<4 x float> <float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00>'		value: '<4 x float> <float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00>'
alignment: 16		alignment: 16
isTargetSpecific: false		isTargetSpecific: false
machineFunctionInfo: {}		machineFunctionInfo: {}
body: \|		body: \|
; CHECK-LABEL: name: dont_ignore_vctp		; CHECK-LABEL: name: it_block_2_stmts
; CHECK: bb.0.entry:		; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x80000000)		; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $lr, $r0, $r1, $r2, $r7		; CHECK: liveins: $lr, $r0, $r1, $r2, $r7
; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp		; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8		; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4		; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8		; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
; CHECK: renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg		; CHECK: renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg
; CHECK: renamable $r12 = t2MOVi 4, 14, $noreg, $noreg		; CHECK: renamable $r12 = t2MOVi 4, 14, $noreg, $noreg
; CHECK: tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def $cpsr		; CHECK: tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate		; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: dead $r12 = t2LSLri killed renamable $r2, 1, 11, killed $cpsr, $noreg, implicit killed renamable $r12, implicit killed $itstate		; CHECK: $r12 = t2LSLri renamable $r2, 1, 11, $cpsr, $noreg, implicit killed renamable $r12, implicit $itstate
		; CHECK: $r0 = t2ADDri killed renamable $r0, 42, 11, killed $cpsr, $noreg, implicit killed renamable $r0, implicit killed $itstate
		; CHECK: renamable $r2 = t2RSBrs killed renamable $r12, killed renamable $r2, 10, 14, $noreg, $noreg
		; CHECK: renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg
		; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg
		; CHECK: dead renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
; CHECK: renamable $r2 = tLEApcrel %const.0, 14, $noreg		; CHECK: renamable $r2 = tLEApcrel %const.0, 14, $noreg
; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)		; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3		; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
; CHECK: bb.1.do.body (align 4):		; CHECK: bb.1.do.body (align 4):
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)		; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: liveins: $lr, $q0, $r0, $r1		; CHECK: liveins: $lr, $q0, $r0, $r1
; CHECK: renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 0, $noreg		; CHECK: renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 0, $noreg
; CHECK: renamable $q1 = nnan ninf nsz MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1		; CHECK: renamable $q1 = nnan ninf nsz MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1
Show All 12 Lines	bb.0.entry:
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp		frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8		frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4		frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8		frame-setup CFI_INSTRUCTION offset $r7, -8
renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg		renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg
renamable $r12 = t2MOVi 4, 14, $noreg, $noreg		renamable $r12 = t2MOVi 4, 14, $noreg, $noreg
tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def $cpsr		tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def $cpsr
t2IT 11, 8, implicit-def $itstate		t2IT 11, 8, implicit-def $itstate
$r12 = t2LSLri renamable $r2, 1, 11, killed $cpsr, $noreg, implicit killed renamable $r12, implicit killed $itstate		$r12 = t2LSLri renamable $r2, 1, 11, $cpsr, $noreg, implicit renamable $r12, implicit $itstate
		$r0 = t2ADDri renamable $r0, 42, 11, killed $cpsr, $noreg, implicit killed renamable $r0, implicit killed $itstate
renamable $r2 = t2RSBrs killed renamable $r12, killed renamable $r2, 10, 14, $noreg, $noreg		renamable $r2 = t2RSBrs killed renamable $r12, killed renamable $r2, 10, 14, $noreg, $noreg
renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg		renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg
renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg		renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg		renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
renamable $r2 = tLEApcrel %const.0, 14, $noreg		renamable $r2 = tLEApcrel %const.0, 14, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)		renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
t2DoLoopStart renamable $lr		t2DoLoopStart renamable $lr

Show All 23 Lines