Diff 440774

llvm/include/llvm/CodeGen/MachineCombinerPattern.h

Show All 28 Lines	enum class MachineCombinerPattern {
REASSOC_XY_AMM_BMM,		REASSOC_XY_AMM_BMM,
REASSOC_XMM_AMM_BMM,		REASSOC_XMM_AMM_BMM,

// These are patterns matched by the PowerPC to reassociate FMA and FSUB to		// These are patterns matched by the PowerPC to reassociate FMA and FSUB to
// reduce register pressure.		// reduce register pressure.
REASSOC_XY_BCA,		REASSOC_XY_BCA,
REASSOC_XY_BAC,		REASSOC_XY_BAC,

		// These are patterns used to reduce the length of dependence chain.
		SUBADD_OP1,
		SUBADD_OP2,

// These are multiply-add patterns matched by the AArch64 machine combiner.		// These are multiply-add patterns matched by the AArch64 machine combiner.
MULADDW_OP1,		MULADDW_OP1,
MULADDW_OP2,		MULADDW_OP2,
MULSUBW_OP1,		MULSUBW_OP1,
MULSUBW_OP2,		MULSUBW_OP2,
MULADDWI_OP1,		MULADDWI_OP1,
MULSUBWI_OP1,		MULSUBWI_OP1,
MULADDX_OP1,		MULADDX_OP1,
▲ Show 20 Lines • Show All 128 Lines • Show Last 20 Lines

llvm/lib/CodeGen/MachineCombiner.cpp

Show First 20 Lines • Show All 271 Lines • ▼ Show 20 Lines	static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
// MachineCombinerPattern class.		// MachineCombinerPattern class.
switch (P) {		switch (P) {
case MachineCombinerPattern::REASSOC_AX_BY:		case MachineCombinerPattern::REASSOC_AX_BY:
case MachineCombinerPattern::REASSOC_AX_YB:		case MachineCombinerPattern::REASSOC_AX_YB:
case MachineCombinerPattern::REASSOC_XA_BY:		case MachineCombinerPattern::REASSOC_XA_BY:
case MachineCombinerPattern::REASSOC_XA_YB:		case MachineCombinerPattern::REASSOC_XA_YB:
case MachineCombinerPattern::REASSOC_XY_AMM_BMM:		case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:		case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
		case MachineCombinerPattern::SUBADD_OP1:
		case MachineCombinerPattern::SUBADD_OP2:
return CombinerObjective::MustReduceDepth;		return CombinerObjective::MustReduceDepth;
case MachineCombinerPattern::REASSOC_XY_BCA:		case MachineCombinerPattern::REASSOC_XY_BCA:
case MachineCombinerPattern::REASSOC_XY_BAC:		case MachineCombinerPattern::REASSOC_XY_BAC:
return CombinerObjective::MustReduceRegisterPressure;		return CombinerObjective::MustReduceRegisterPressure;
default:		default:
return CombinerObjective::Default;		return CombinerObjective::Default;
}		}
}		}
▲ Show 20 Lines • Show All 451 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,860 Lines • ▼ Show 20 Lines	if (CheckZeroReg) {
assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&		assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&		MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");		MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
// The third input reg must be zero.		// The third input reg must be zero.
if (MI->getOperand(3).getReg() != ZeroReg)		if (MI->getOperand(3).getReg() != ZeroReg)
return false;		return false;
}		}

		if (isCombineInstrSettingFlag(CombineOpc) &&
		MI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
		return false;

return true;		return true;
}		}

//		//
// Is \param MO defined by an integer multiply and can be combined?		// Is \param MO defined by an integer multiply and can be combined?
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,		static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
unsigned MulOpc, unsigned ZeroReg) {		unsigned MulOpc, unsigned ZeroReg) {
return canCombine(MBB, MO, MulOpc, ZeroReg, true);		return canCombine(MBB, MO, MulOpc, ZeroReg, true);
▲ Show 20 Lines • Show All 484 Lines • ▼ Show 20 Lines	bool AArch64InstrInfo::isThroughputPattern(
case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:		case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:		case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:		case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:		case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
return true;		return true;
} // end switch (Pattern)		} // end switch (Pattern)
return false;		return false;
}		}

		/// Find other MI combine patterns.
		static bool getMiscPatterns(MachineInstr &Root,
		SmallVectorImpl<MachineCombinerPattern> &Patterns)
		{
		// A - (B + C) ==> (A - B) - C or (A - C) - B
		unsigned Opc = Root.getOpcode();
		MachineBasicBlock &MBB = *Root.getParent();

		switch (Opc) {
		case AArch64::SUBWrr:
		case AArch64::SUBSWrr:
		case AArch64::SUBXrr:
		case AArch64::SUBSXrr:
		// Found candidate root.
		break;
		default:
		return false;
		}

		if (isCombineInstrSettingFlag(Opc) &&
		Root.findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
		return false;

		if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) \|\|
		canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) \|\|
		canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) \|\|
		canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
		Patterns.push_back(MachineCombinerPattern::SUBADD_OP1);
		Patterns.push_back(MachineCombinerPattern::SUBADD_OP2);
		return true;
		}

		return false;
		}

/// Return true when there is potentially a faster code sequence for an		/// Return true when there is potentially a faster code sequence for an
/// instruction chain ending in \p Root. All potential patterns are listed in		/// instruction chain ending in \p Root. All potential patterns are listed in
/// the \p Pattern vector. Pattern should be sorted in priority order since the		/// the \p Pattern vector. Pattern should be sorted in priority order since the
/// pattern evaluator stops checking as soon as it finds a faster sequence.		/// pattern evaluator stops checking as soon as it finds a faster sequence.

bool AArch64InstrInfo::getMachineCombinerPatterns(		bool AArch64InstrInfo::getMachineCombinerPatterns(
MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,		MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
bool DoRegPressureReduce) const {		bool DoRegPressureReduce) const {
// Integer patterns		// Integer patterns
if (getMaddPatterns(Root, Patterns))		if (getMaddPatterns(Root, Patterns))
return true;		return true;
// Floating point patterns		// Floating point patterns
if (getFMULPatterns(Root, Patterns))		if (getFMULPatterns(Root, Patterns))
return true;		return true;
if (getFMAPatterns(Root, Patterns))		if (getFMAPatterns(Root, Patterns))
return true;		return true;

		// Other patterns
		if (getMiscPatterns(Root, Patterns))
		return true;

return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,		return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
DoRegPressureReduce);		DoRegPressureReduce);
}		}

enum class FMAInstKind { Default, Indexed, Accumulator };		enum class FMAInstKind { Default, Indexed, Accumulator };
/// genFusedMultiply - Generate fused multiply instructions.		/// genFusedMultiply - Generate fused multiply instructions.
/// This function supports both integer and floating point instructions.		/// This function supports both integer and floating point instructions.
/// A typical example:		/// A typical example:
▲ Show 20 Lines • Show All 234 Lines • ▼ Show 20 Lines	MachineInstrBuilder MIB =
.addReg(SrcReg0, getKillRegState(Src0IsKill))		.addReg(SrcReg0, getKillRegState(Src0IsKill))
.addReg(SrcReg1, getKillRegState(Src1IsKill))		.addReg(SrcReg1, getKillRegState(Src1IsKill))
.addReg(VR);		.addReg(VR);
// Insert the MADD		// Insert the MADD
InsInstrs.push_back(MIB);		InsInstrs.push_back(MIB);
return MUL;		return MUL;
}		}

		/// Do the following transformation
		/// A - (B + C) ==> (A - B) - C
		/// A - (B + C) ==> (A - C) - B
		static void
		genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI,
		const TargetInstrInfo *TII, MachineInstr &Root,
		SmallVectorImpl<MachineInstr *> &InsInstrs,
		SmallVectorImpl<MachineInstr *> &DelInstrs,
		unsigned IdxOpd1,
		DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
		assert(IdxOpd1 == 1 \|\| IdxOpd1 == 2);
		unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
		MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());

		Register ResultReg = Root.getOperand(0).getReg();
		Register RegA = Root.getOperand(1).getReg();
		bool RegAIsKill = Root.getOperand(1).isKill();
		Register RegB = AddMI->getOperand(IdxOpd1).getReg();
		bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
		Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
		bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
		Register NewVR = MRI.createVirtualRegister(MRI.getRegClass(RegA));

		unsigned Opcode = Root.getOpcode();
		if (Opcode == AArch64::SUBSWrr)
		Opcode = AArch64::SUBWrr;
		else if (Opcode == AArch64::SUBSXrr)
		Opcode = AArch64::SUBXrr;
		else
		assert((Opcode == AArch64::SUBWrr \|\| Opcode == AArch64::SUBXrr) &&
		"Unexpected instruction opcode.");

		MachineInstrBuilder MIB1 =
		BuildMI(MF, Root.getDebugLoc(), TII->get(Opcode), NewVR)
		.addReg(RegA, getKillRegState(RegAIsKill))
		.addReg(RegB, getKillRegState(RegBIsKill));
		MachineInstrBuilder MIB2 =
		BuildMI(MF, Root.getDebugLoc(), TII->get(Opcode), ResultReg)
		.addReg(NewVR, getKillRegState(true))
		.addReg(RegC, getKillRegState(RegCIsKill));

		InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
		InsInstrs.push_back(MIB1);
		InsInstrs.push_back(MIB2);
		DelInstrs.push_back(AddMI);
		}

/// When getMachineCombinerPatterns() finds potential patterns,		/// When getMachineCombinerPatterns() finds potential patterns,
/// this function generates the instructions that could replace the		/// this function generates the instructions that could replace the
/// original code sequence		/// original code sequence
void AArch64InstrInfo::genAlternativeCodeSequence(		void AArch64InstrInfo::genAlternativeCodeSequence(
MachineInstr &Root, MachineCombinerPattern Pattern,		MachineInstr &Root, MachineCombinerPattern Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,		SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,		SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {		DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
MachineBasicBlock &MBB = *Root.getParent();		MachineBasicBlock &MBB = *Root.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();		MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineFunction &MF = *MBB.getParent();		MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();		const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();

MachineInstr *MUL = nullptr;		MachineInstr *MUL = nullptr;
const TargetRegisterClass *RC;		const TargetRegisterClass *RC;
unsigned Opc;		unsigned Opc;
switch (Pattern) {		switch (Pattern) {
default:		default:
// Reassociate instructions.		// Reassociate instructions.
TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,		TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
DelInstrs, InstrIdxForVirtReg);		DelInstrs, InstrIdxForVirtReg);
return;		return;
		case MachineCombinerPattern::SUBADD_OP1:
		// A - (B + C)
		// ==> (A - B) - C
		genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
		InstrIdxForVirtReg);
		break;
		case MachineCombinerPattern::SUBADD_OP2:
		// A - (B + C)
		// ==> (A - C) - B
		genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
		InstrIdxForVirtReg);
		break;
case MachineCombinerPattern::MULADDW_OP1:		case MachineCombinerPattern::MULADDW_OP1:
case MachineCombinerPattern::MULADDX_OP1:		case MachineCombinerPattern::MULADDX_OP1:
// MUL I=A,B,0		// MUL I=A,B,0
// ADD R,I,C		// ADD R,I,C
// ==> MADD R,A,B,C		// ==> MADD R,A,B,C
// --- Create(MADD);		// --- Create(MADD);
if (Pattern == MachineCombinerPattern::MULADDW_OP1) {		if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
Opc = AArch64::MADDWrrr;		Opc = AArch64::MADDWrrr;
▲ Show 20 Lines • Show All 2,385 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/machine-combiner-subadd.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=aarch64-linux-gnu %s -o - \| FileCheck %s

				; The test cases in this file check following transformation if the right form
				; can reduce latency.
				; A - (B + C) ==> (A - B) - C

				; 32 bit version.
				define i32 @test1(i32 %a, i32 %b, i32 %c) {
				; CHECK-LABEL: test1:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: add w9, w0, #100
				; CHECK-NEXT: orr w8, w2, #0x80
				; CHECK-NEXT: sub w8, w8, w9
				; CHECK-NEXT: eor w9, w1, w9, lsl #8
				; CHECK-NEXT: sub w8, w8, w9
				; CHECK-NEXT: eor w0, w8, w9, asr #13
				; CHECK-NEXT: ret
				entry:
				%c1 = or i32 %c, 128
				%a1 = add i32 %a, 100
				%shl = shl i32 %a1, 8
				%xor = xor i32 %shl, %b
				%add = add i32 %xor, %a1
				%sub = sub i32 %c1, %add
				%shr = ashr i32 %xor, 13
				%xor2 = xor i32 %sub, %shr
				ret i32 %xor2
				}

				; 64 bit version.
				define i64 @test2(i64 %a, i64 %b, i64 %c) {
				; CHECK-LABEL: test2:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: add x9, x0, #100
				; CHECK-NEXT: orr x8, x2, #0x80
				; CHECK-NEXT: sub x8, x8, x9
				; CHECK-NEXT: eor x9, x1, x9, lsl #8
				; CHECK-NEXT: sub x8, x8, x9
				; CHECK-NEXT: eor x0, x8, x9, asr #13
				; CHECK-NEXT: ret
				entry:
				%c1 = or i64 %c, 128
				%a1 = add i64 %a, 100
				%shl = shl i64 %a1, 8
				%xor = xor i64 %shl, %b
				%add = add i64 %xor, %a1
				%sub = sub i64 %c1, %add
				%shr = ashr i64 %xor, 13
				%xor2 = xor i64 %sub, %shr
				ret i64 %xor2
				}

				; Negative test. The right form can't reduce latency.
				define i32 @test3(i32 %a, i32 %b, i32 %c) {
				; CHECK-LABEL: test3:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: add w9, w0, #100
				; CHECK-NEXT: orr w8, w2, #0x80
				; CHECK-NEXT: add w8, w8, w9
				; CHECK-NEXT: eor w9, w1, w9, lsl #8
				; CHECK-NEXT: sub w8, w9, w8
				; CHECK-NEXT: eor w0, w8, w9, asr #13
				; CHECK-NEXT: ret
				entry:
				%c1 = or i32 %c, 128
				%a1 = add i32 %a, 100
				%shl = shl i32 %a1, 8
				%xor = xor i32 %shl, %b
				%add = add i32 %c1, %a1
				%sub = sub i32 %xor, %add
				%shr = ashr i32 %xor, 13
				%xor2 = xor i32 %sub, %shr
				ret i32 %xor2
				}

llvm/test/CodeGen/AArch64/machine-combiner-subadd2.mir

This file was added.

				# RUN: llc -mtriple=aarch64-linux-gnu -run-pass machine-combiner -o - %s \| FileCheck %s

				# The test cases in this file check following transformation if the right form
				# can reduce latency.
				# A - (B + C) ==> (A - B) - C

				---
				# 32 bit.

				# CHECK-LABEL: name: test1
				# CHECK: %10:gpr32common = SUBWrr killed %3, %4
				# CHECK-NEXT: %7:gpr32 = SUBWrr killed %10, %5

				name: test1
				registers:
				- { id: 0, class: gpr32common }
				- { id: 1, class: gpr32 }
				- { id: 2, class: gpr32 }
				- { id: 3, class: gpr32common }
				- { id: 4, class: gpr32common }
				- { id: 5, class: gpr32 }
				- { id: 6, class: gpr32 }
				- { id: 7, class: gpr32 }
				- { id: 8, class: gpr32 }
				body: \|
				bb.0:
				%2:gpr32 = COPY $w2
				%1:gpr32 = COPY $w1
				%0:gpr32common = COPY $w0
				%3:gpr32common = ORRWri %2:gpr32, 1600
				%4:gpr32common = ADDWri %0:gpr32common, 100, 0
				%5:gpr32 = EORWrs %1:gpr32, %4:gpr32common, 8
				%6:gpr32 = ADDWrr %5:gpr32, %4:gpr32common
				%7:gpr32 = SUBWrr killed %3:gpr32common, killed %6:gpr32
				%8:gpr32 = EORWrs killed %7:gpr32, %5:gpr32, 141
				$w0 = COPY %8:gpr32
				RET_ReallyLR implicit $w0

				...
				---
				# 64 bit.

				# CHECK-LABEL: name: test2
				# CHECK: %10:gpr64common = SUBXrr killed %3, %4
				# CHECK-NEXT: %7:gpr64 = SUBXrr killed %10, %5

				name: test2
				registers:
				- { id: 0, class: gpr64common }
				- { id: 1, class: gpr64 }
				- { id: 2, class: gpr64 }
				- { id: 3, class: gpr64common }
				- { id: 4, class: gpr64common }
				- { id: 5, class: gpr64 }
				- { id: 6, class: gpr64 }
				- { id: 7, class: gpr64 }
				- { id: 8, class: gpr64 }
				body: \|
				bb.0:
				%2:gpr64 = COPY $x2
				%1:gpr64 = COPY $x1
				%0:gpr64common = COPY $x0
				%3:gpr64common = ORRXri %2:gpr64, 1600
				%4:gpr64common = ADDXri %0:gpr64common, 100, 0
				%5:gpr64 = EORXrs %1:gpr64, %4:gpr64common, 8
				%6:gpr64 = ADDXrr %5:gpr64, %4:gpr64common
				%7:gpr64 = SUBXrr killed %3:gpr64common, killed %6:gpr64
				%8:gpr64 = EORXrs killed %7:gpr64, %5:gpr64, 141
				$x0 = COPY %8:gpr64
				RET_ReallyLR implicit $x0

				...
				---
				# Negative test. The right form can't reduce latency.

				# CHECK-LABEL: name: test3
				# CHECK: %6:gpr32 = ADDWrr killed %3, %4
				# CHECK-NEXT: %7:gpr32 = SUBWrr %5, killed %6

				name: test3
				registers:
				- { id: 0, class: gpr32common }
				- { id: 1, class: gpr32 }
				- { id: 2, class: gpr32 }
				- { id: 3, class: gpr32common }
				- { id: 4, class: gpr32common }
				- { id: 5, class: gpr32 }
				- { id: 6, class: gpr32 }
				- { id: 7, class: gpr32 }
				- { id: 8, class: gpr32 }
				body: \|
				bb.0:
				%2:gpr32 = COPY $w2
				%1:gpr32 = COPY $w1
				%0:gpr32common = COPY $w0
				%3:gpr32common = ORRWri %2:gpr32, 1600
				%4:gpr32common = ADDWri %0:gpr32common, 100, 0
				%5:gpr32 = EORWrs %1:gpr32, %4:gpr32common, 8
				%6:gpr32 = ADDWrr killed %3:gpr32common, %4:gpr32common
				%7:gpr32 = SUBWrr %5:gpr32, killed %6:gpr32
				%8:gpr32 = EORWrs killed %7:gpr32, %5:gpr32, 141
				$w0 = COPY %8:gpr32
				RET_ReallyLR implicit $w0

				...
				---
				# Dead define of flag registers should not block transformation.

				# CHECK-LABEL: name: test4
				# CHECK: %10:gpr64common = SUBXrr killed %3, %4
				# CHECK-NEXT: %7:gpr64 = SUBXrr killed %10, %5

				name: test4
				registers:
				- { id: 0, class: gpr64common }
				- { id: 1, class: gpr64 }
				- { id: 2, class: gpr64 }
				- { id: 3, class: gpr64common }
				- { id: 4, class: gpr64common }
				- { id: 5, class: gpr64 }
				- { id: 6, class: gpr64 }
				- { id: 7, class: gpr64 }
				- { id: 8, class: gpr64 }
				body: \|
				bb.0:
				%2:gpr64 = COPY $x2
				%1:gpr64 = COPY $x1
				%0:gpr64common = COPY $x0
				%3:gpr64common = ORRXri %2:gpr64, 1600
				%4:gpr64common = ADDXri %0:gpr64common, 100, 0
				%5:gpr64 = EORXrs %1:gpr64, %4:gpr64common, 8
				%6:gpr64 = ADDSXrr %5:gpr64, %4:gpr64common, implicit-def dead $nzcv
				%7:gpr64 = SUBSXrr killed %3:gpr64common, killed %6:gpr64, implicit-def dead $nzcv
				%8:gpr64 = EORXrs killed %7:gpr64, %5:gpr64, 141
				$x0 = COPY %8:gpr64
				RET_ReallyLR implicit $x0

				...
				---
				# Non dead define of flag register in SUB can block the transformation.

				# CHECK-LABEL: name: test5
				# CHECK: %6:gpr32 = ADDWrr %5, %4
				# CHECK-NEXT: %7:gpr32 = SUBSWrr killed %3, killed %6, implicit-def $nzcv

				name: test5
				registers:
				- { id: 0, class: gpr32common }
				- { id: 1, class: gpr32 }
				- { id: 2, class: gpr32 }
				- { id: 3, class: gpr32common }
				- { id: 4, class: gpr32common }
				- { id: 5, class: gpr32 }
				- { id: 6, class: gpr32 }
				- { id: 7, class: gpr32 }
				- { id: 8, class: gpr32 }
				body: \|
				bb.0:
				%2:gpr32 = COPY $w2
				%1:gpr32 = COPY $w1
				%0:gpr32common = COPY $w0
				%3:gpr32common = ORRWri %2:gpr32, 1600
				%4:gpr32common = ADDWri %0:gpr32common, 100, 0
				%5:gpr32 = EORWrs %1:gpr32, %4:gpr32common, 8
				%6:gpr32 = ADDWrr %5:gpr32, %4:gpr32common
				%7:gpr32 = SUBSWrr killed %3:gpr32common, killed %6:gpr32, implicit-def $nzcv
				%8:gpr32 = EORWrs killed %7:gpr32, %5:gpr32, 141
				$w0 = COPY %8:gpr32
				RET_ReallyLR implicit $w0

				...
				---
				# Non dead define of flag register in ADD can block the transformation.

				# CHECK-LABEL: name: test6
				# CHECK: %6:gpr64 = ADDSXrr %5, %4, implicit-def $nzcv
				# CHECK-NEXT: %7:gpr64 = SUBXrr killed %3, killed %6

				name: test6
				registers:
				- { id: 0, class: gpr64common }
				- { id: 1, class: gpr64 }
				- { id: 2, class: gpr64 }
				- { id: 3, class: gpr64common }
				- { id: 4, class: gpr64common }
				- { id: 5, class: gpr64 }
				- { id: 6, class: gpr64 }
				- { id: 7, class: gpr64 }
				- { id: 8, class: gpr64 }
				body: \|
				bb.0:
				%2:gpr64 = COPY $x2
				%1:gpr64 = COPY $x1
				%0:gpr64common = COPY $x0
				%3:gpr64common = ORRXri %2:gpr64, 1600
				%4:gpr64common = ADDXri %0:gpr64common, 100, 0
				%5:gpr64 = EORXrs %1:gpr64, %4:gpr64common, 8
				%6:gpr64 = ADDSXrr %5:gpr64, %4:gpr64common, implicit-def $nzcv
				%7:gpr64 = SUBXrr killed %3:gpr64common, killed %6:gpr64
				%8:gpr64 = EORXrs killed %7:gpr64, %5:gpr64, 141
				$x0 = COPY %8:gpr64
				RET_ReallyLR implicit $x0

				...
				---
				# ADD has multiple uses, so it is always required, we should not transform it.

				# CHECK-LABEL: name: test7
				# CHECK: %6:gpr32 = ADDWrr %5, %4
				# CHECK-NEXT: %7:gpr32 = SUBWrr killed %3, %6

				name: test7
				registers:
				- { id: 0, class: gpr32common }
				- { id: 1, class: gpr32 }
				- { id: 2, class: gpr32 }
				- { id: 3, class: gpr32common }
				- { id: 4, class: gpr32common }
				- { id: 5, class: gpr32 }
				- { id: 6, class: gpr32 }
				- { id: 7, class: gpr32 }
				- { id: 8, class: gpr32 }
				- { id: 9, class: gpr32 }
				body: \|
				bb.0:
				%2:gpr32 = COPY $w2
				%1:gpr32 = COPY $w1
				%0:gpr32common = COPY $w0
				%3:gpr32common = ORRWri %2:gpr32, 1600
				%4:gpr32common = ADDWri %0:gpr32common, 100, 0
				%5:gpr32 = EORWrs %1:gpr32, %4:gpr32common, 8
				%6:gpr32 = ADDWrr %5:gpr32, %4:gpr32common
				%7:gpr32 = SUBWrr killed %3:gpr32common, %6:gpr32
				%8:gpr32 = EORWrs killed %7:gpr32, %5:gpr32, 141
				%9:gpr32 = ADDWrr %8:gpr32, %6:gpr32
				$w0 = COPY %9:gpr32
				RET_ReallyLR implicit $w0

				...

This is an archive of the discontinued LLVM Phabricator instance.

[MachineCombiner, AArch64] Add a new pattern A-(B+C) => (A-B)-C to reduce latency
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 440774

llvm/include/llvm/CodeGen/MachineCombinerPattern.h

llvm/lib/CodeGen/MachineCombiner.cpp

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

llvm/test/CodeGen/AArch64/machine-combiner-subadd.ll

llvm/test/CodeGen/AArch64/machine-combiner-subadd2.mir

This is an archive of the discontinued LLVM Phabricator instance.

[MachineCombiner, AArch64] Add a new pattern A-(B+C) => (A-B)-C to reduce latencyClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 440774

llvm/include/llvm/CodeGen/MachineCombinerPattern.h

llvm/lib/CodeGen/MachineCombiner.cpp

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

llvm/test/CodeGen/AArch64/machine-combiner-subadd.ll

llvm/test/CodeGen/AArch64/machine-combiner-subadd2.mir

[MachineCombiner, AArch64] Add a new pattern A-(B+C) => (A-B)-C to reduce latency
ClosedPublic