This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
lib/Target/X86/
-
Target/
-
X86/
2
X86OptimizeLEAs.cpp

Differential D15692

[X86] Reduce complexity of the LEA optimization pass
ClosedPublic

Authored by aturetsk on Dec 21 2015, 8:35 AM.

Download Raw Diff

Details

Reviewers

qcolombet
• dberlin

Commits

rL257328: [X86] Reduce complexity of the LEA optimization pass, by Andrey Turetsky.

Summary

In the OptimizeLEA pass keep instructions' positions in the basic block saved and use them for calculation of the distance between two instructions instead of std::distance. This reduces complexity of the pass from O(n^3) to O(n^2) and thus the compile time.

Diff Detail

Event Timeline

aturetsk updated this revision to Diff 43377.Dec 21 2015, 8:35 AM

aturetsk retitled this revision from to [X86] Reduce complexity of the LEA optimization pass.

aturetsk updated this object.

aturetsk added reviewers: qcolombet, • dberlin.

aturetsk added a subscriber: llvm-commits.

Here are the results.

Just -Os (LEA pass is disabled):

$ time ./bin/clang++ -std=c++11 -S a.ll -Os

real    0m8.328s
user    0m8.282s
sys     0m0.041s

-Os with the old LEA pass:

$ time ./bin/clang++ -std=c++11 -S a.ll -Os -mllvm -enable-x86-lea-opt

real    0m11.653s
user    0m11.591s
sys     0m0.059s

-Os with the new LEA pass:

$ time ./bin/clang++ -std=c++11 -S a.ll -Os -mllvm -enable-x86-lea-opt

real    0m8.446s
user    0m8.380s
sys     0m0.064s

a.ll is taken from the example from https://llvm.org/bugs/show_bug.cgi?id=25843 and was generated this way:

$ python a.py 5000 > a.cc
$ ./bin/clang++ -std=c++11 -S -emit-llvm -Os a.cc
(this took about 1.5 hours)

Thanks for this! Just one tiny nit.

lib/Target/X86/X86OptimizeLEAs.cpp
91	Is there a reason that we can't use `DenseMap` here instead?

Replaced unordered_map with DenseMap.

Hello George,
Thanks for the review.

lib/Target/X86/X86OptimizeLEAs.cpp
91	Fixed.

qcolombet accepted this revision.Jan 6 2016, 11:26 AM

qcolombet edited edge metadata.

This revision is now accepted and ready to land.Jan 6 2016, 11:26 AM

Closed by commit rL257328: [X86] Reduce complexity of the LEA optimization pass, by Andrey Turetsky. (authored by ABataev). · Explain WhyJan 11 2016, 3:56 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

lib/

Target/

X86/

X86OptimizeLEAs.cpp

35 lines

Diff 43437

lib/Target/X86/X86OptimizeLEAs.cpp

Show First 20 Lines • Show All 73 Lines • ▼ Show 20 Lines	private:
/// \brief Returns true if two instructions have memory operands that only		/// \brief Returns true if two instructions have memory operands that only
/// differ by displacement. The numbers of the first memory operands for both		/// differ by displacement. The numbers of the first memory operands for both
/// instructions are specified through \p N1 and \p N2. The address		/// instructions are specified through \p N1 and \p N2. The address
/// displacement is returned through AddrDispShift.		/// displacement is returned through AddrDispShift.
bool isSimilarMemOp(const MachineInstr &MI1, unsigned N1,		bool isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
const MachineInstr &MI2, unsigned N2,		const MachineInstr &MI2, unsigned N2,
int64_t &AddrDispShift);		int64_t &AddrDispShift);

/// \brief Find all LEA instructions in the basic block.		/// \brief Find all LEA instructions in the basic block. Also, assign position
		/// numbers to all instructions in the basic block to speed up calculation of
		/// distance between them.
void findLEAs(const MachineBasicBlock &MBB,		void findLEAs(const MachineBasicBlock &MBB,
SmallVectorImpl<MachineInstr *> &List);		SmallVectorImpl<MachineInstr *> &List);

/// \brief Removes redundant address calculations.		/// \brief Removes redundant address calculations.
bool removeRedundantAddrCalc(const SmallVectorImpl<MachineInstr *> &List);		bool removeRedundantAddrCalc(const SmallVectorImpl<MachineInstr *> &List);

		DenseMap<const MachineInstr *, unsigned> InstrPos;
		george.burgess.ivUnsubmitted Not Done Reply Inline Actions Is there a reason that we can't use `DenseMap` here instead? george.burgess.iv: Is there a reason that we can't use `DenseMap` here instead?
		aturetskAuthorUnsubmitted Not Done Reply Inline Actions Fixed. aturetsk: Fixed.

MachineRegisterInfo *MRI;		MachineRegisterInfo *MRI;
const X86InstrInfo *TII;		const X86InstrInfo *TII;
const X86RegisterInfo *TRI;		const X86RegisterInfo *TRI;

static char ID;		static char ID;
};		};
char OptimizeLEAPass::ID = 0;		char OptimizeLEAPass::ID = 0;
}		}

FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); }		FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); }

int OptimizeLEAPass::calcInstrDist(const MachineInstr &First,		int OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
const MachineInstr &Last) {		const MachineInstr &Last) {
const MachineBasicBlock *MBB = First.getParent();		// Both instructions must be in the same basic block and they must be
		// presented in InstrPos.
// Both instructions must be in the same basic block.		assert(Last.getParent() == First.getParent() &&
assert(Last.getParent() == MBB &&
"Instructions are in different basic blocks");		"Instructions are in different basic blocks");
		assert(InstrPos.find(&First) != InstrPos.end() &&
		InstrPos.find(&Last) != InstrPos.end() &&
		"Instructions' positions are undefined");

return std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&Last)) -		return InstrPos[&Last] - InstrPos[&First];
std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&First));
}		}

// Find the best LEA instruction in the List to replace address recalculation in		// Find the best LEA instruction in the List to replace address recalculation in
// MI. Such LEA must meet these requirements:		// MI. Such LEA must meet these requirements:
// 1) The address calculated by the LEA differs only by the displacement from		// 1) The address calculated by the LEA differs only by the displacement from
// the address used in MI.		// the address used in MI.
// 2) The register class of the definition of the LEA is compatible with the		// 2) The register class of the definition of the LEA is compatible with the
// register class of the address base register of MI.		// register class of the address base register of MI.
▲ Show 20 Lines • Show All 96 Lines • ▼ Show 20 Lines	else
return false;		return false;
}		}

return true;		return true;
}		}

void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB,		void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB,
SmallVectorImpl<MachineInstr *> &List) {		SmallVectorImpl<MachineInstr *> &List) {
		unsigned Pos = 0;
for (auto &MI : MBB) {		for (auto &MI : MBB) {
		// Assign the position number to the instruction. Note that we are going to
		// move some instructions during the optimization however there will never
		// be a need to move two instructions before any selected instruction. So to
		// avoid multiple positions' updates during moves we just increase position
		// counter by two leaving a free space for instructions which will be moved.
		InstrPos[&MI] = Pos += 2;

if (isLEA(MI))		if (isLEA(MI))
List.push_back(const_cast<MachineInstr *>(&MI));		List.push_back(const_cast<MachineInstr *>(&MI));
}		}
}		}

// Try to find load and store instructions which recalculate addresses already		// Try to find load and store instructions which recalculate addresses already
// calculated by some LEA and replace their memory operands with its def		// calculated by some LEA and replace their memory operands with its def
// register.		// register.
Show All 34 Lines	for (auto I = MBB->begin(), E = MBB->end(); I != E;) {
// the instruction. If LEA occurs after, we can lift LEA above the		// the instruction. If LEA occurs after, we can lift LEA above the
// instruction and this way to be able to replace it. Since LEA and the		// instruction and this way to be able to replace it. Since LEA and the
// instruction have similar memory operands (thus, the same def		// instruction have similar memory operands (thus, the same def
// instructions for these operands), we can always do that, without		// instructions for these operands), we can always do that, without
// worries of using registers before their defs.		// worries of using registers before their defs.
if (Dist < 0) {		if (Dist < 0) {
DefMI->removeFromParent();		DefMI->removeFromParent();
MBB->insert(MachineBasicBlock::iterator(&MI), DefMI);		MBB->insert(MachineBasicBlock::iterator(&MI), DefMI);
		InstrPos[DefMI] = InstrPos[&MI] - 1;

		// Make sure the instructions' position numbers are sane.
		assert(((InstrPos[DefMI] == 1 && DefMI == MBB->begin()) \|\|
		InstrPos[DefMI] >
		InstrPos[std::prev(MachineBasicBlock::iterator(DefMI))]) &&
		"Instruction positioning is broken");
}		}

// Since we can possibly extend register lifetime, clear kill flags.		// Since we can possibly extend register lifetime, clear kill flags.
MRI->clearKillFlags(DefMI->getOperand(0).getReg());		MRI->clearKillFlags(DefMI->getOperand(0).getReg());

++NumSubstLEAs;		++NumSubstLEAs;
DEBUG(dbgs() << "OptimizeLEAs: Candidate to replace: "; MI.dump(););		DEBUG(dbgs() << "OptimizeLEAs: Candidate to replace: "; MI.dump(););

Show All 24 Lines	bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {

MRI = &MF.getRegInfo();		MRI = &MF.getRegInfo();
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();		TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();		TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();

// Process all basic blocks.		// Process all basic blocks.
for (auto &MBB : MF) {		for (auto &MBB : MF) {
SmallVector<MachineInstr *, 16> LEAs;		SmallVector<MachineInstr *, 16> LEAs;
		InstrPos.clear();

// Find all LEA instructions in basic block.		// Find all LEA instructions in basic block.
findLEAs(MBB, LEAs);		findLEAs(MBB, LEAs);

// If current basic block has no LEAs, move on to the next one.		// If current basic block has no LEAs, move on to the next one.
if (LEAs.empty())		if (LEAs.empty())
continue;		continue;

// Remove redundant address calculations.		// Remove redundant address calculations.
Changed \|= removeRedundantAddrCalc(LEAs);		Changed \|= removeRedundantAddrCalc(LEAs);
}		}

return Changed;		return Changed;
}		}