Skip to content

Commit a0aa41d

Browse files
author
Zhaoshi Zheng
committedSep 4, 2018
Revert "Revert r341269: [Constant Hoisting] Hoisting Constant GEP Expressions"
Reland r341269. Use std::stable_sort when sorting constant condidates. Reverting commit, r341365: Revert r341269: [Constant Hoisting] Hoisting Constant GEP Expressions One of the tests is failing 50% of the time when expensive checks are enabled. Not sure how deep the problem is so just reverting while the author can investigate so that the bots stop repeatedly failing and blaming things incorrectly. Will respond with details on the original commit. Original commit, r341269: [Constant Hoisting] Hoisting Constant GEP Expressions Leverage existing logic in constant hoisting pass to transform constant GEP expressions sharing the same base global variable. Multi-dimensional GEPs are rewritten into single-dimensional GEPs. https://reviews.llvm.org/D51396 Differential Revision: https://reviews.llvm.org/D51654 llvm-svn: 341417
1 parent dbacea1 commit a0aa41d

File tree

6 files changed

+418
-53
lines changed

6 files changed

+418
-53
lines changed
 

‎llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h

+51-16
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H
3939

4040
#include "llvm/ADT/DenseMap.h"
41+
#include "llvm/ADT/PointerUnion.h"
4142
#include "llvm/ADT/SmallPtrSet.h"
4243
#include "llvm/ADT/SmallVector.h"
4344
#include "llvm/IR/PassManager.h"
@@ -50,8 +51,10 @@ class BasicBlock;
5051
class BlockFrequencyInfo;
5152
class Constant;
5253
class ConstantInt;
54+
class ConstantExpr;
5355
class DominatorTree;
5456
class Function;
57+
class GlobalVariable;
5558
class Instruction;
5659
class TargetTransformInfo;
5760

@@ -74,10 +77,15 @@ using ConstantUseListType = SmallVector<ConstantUser, 8>;
7477
/// Keeps track of a constant candidate and its uses.
7578
struct ConstantCandidate {
7679
ConstantUseListType Uses;
80+
// If the candidate is a ConstantExpr (currely only constant GEP expressions
81+
// whose base pointers are GlobalVariables are supported), ConstInt records
82+
// its offset from the base GV, ConstExpr tracks the candidate GEP expr.
7783
ConstantInt *ConstInt;
84+
ConstantExpr *ConstExpr;
7885
unsigned CumulativeCost = 0;
7986

80-
ConstantCandidate(ConstantInt *ConstInt) : ConstInt(ConstInt) {}
87+
ConstantCandidate(ConstantInt *ConstInt, ConstantExpr *ConstExpr=nullptr) :
88+
ConstInt(ConstInt), ConstExpr(ConstExpr) {}
8189

8290
/// Add the user to the use list and update the cost.
8391
void addUser(Instruction *Inst, unsigned Idx, unsigned Cost) {
@@ -91,16 +99,21 @@ struct ConstantCandidate {
9199
struct RebasedConstantInfo {
92100
ConstantUseListType Uses;
93101
Constant *Offset;
102+
Type *Ty;
94103

95-
RebasedConstantInfo(ConstantUseListType &&Uses, Constant *Offset)
96-
: Uses(std::move(Uses)), Offset(Offset) {}
104+
RebasedConstantInfo(ConstantUseListType &&Uses, Constant *Offset,
105+
Type *Ty=nullptr) : Uses(std::move(Uses)), Offset(Offset), Ty(Ty) {}
97106
};
98107

99108
using RebasedConstantListType = SmallVector<RebasedConstantInfo, 4>;
100109

101110
/// A base constant and all its rebased constants.
102111
struct ConstantInfo {
103-
ConstantInt *BaseConstant;
112+
// If the candidate is a ConstantExpr (currely only constant GEP expressions
113+
// whose base pointers are GlobalVariables are supported), ConstInt records
114+
// its offset from the base GV, ConstExpr tracks the candidate GEP expr.
115+
ConstantInt *BaseInt;
116+
ConstantExpr *BaseExpr;
104117
RebasedConstantListType RebasedConstants;
105118
};
106119

@@ -115,49 +128,71 @@ class ConstantHoistingPass : public PassInfoMixin<ConstantHoistingPass> {
115128
BlockFrequencyInfo *BFI, BasicBlock &Entry);
116129

117130
void releaseMemory() {
118-
ConstantVec.clear();
119131
ClonedCastMap.clear();
120-
ConstCandVec.clear();
132+
ConstIntCandVec.clear();
133+
for (auto MapEntry : ConstGEPCandMap)
134+
MapEntry.second.clear();
135+
ConstGEPCandMap.clear();
136+
ConstIntInfoVec.clear();
137+
for (auto MapEntry : ConstGEPInfoMap)
138+
MapEntry.second.clear();
139+
ConstGEPInfoMap.clear();
121140
}
122141

123142
private:
124-
using ConstCandMapType = DenseMap<ConstantInt *, unsigned>;
125-
using ConstCandVecType = std::vector<consthoist::ConstantCandidate>;
143+
using ConstPtrUnionType = PointerUnion<ConstantInt *, ConstantExpr *>;
144+
using ConstCandMapType = DenseMap<ConstPtrUnionType, unsigned>;
126145

127146
const TargetTransformInfo *TTI;
128147
DominatorTree *DT;
129148
BlockFrequencyInfo *BFI;
149+
LLVMContext *Ctx;
150+
const DataLayout *DL;
130151
BasicBlock *Entry;
131152

132153
/// Keeps track of constant candidates found in the function.
133-
ConstCandVecType ConstCandVec;
154+
using ConstCandVecType = std::vector<consthoist::ConstantCandidate>;
155+
using GVCandVecMapType = DenseMap<GlobalVariable *, ConstCandVecType>;
156+
ConstCandVecType ConstIntCandVec;
157+
GVCandVecMapType ConstGEPCandMap;
158+
159+
/// These are the final constants we decided to hoist.
160+
using ConstInfoVecType = SmallVector<consthoist::ConstantInfo, 8>;
161+
using GVInfoVecMapType = DenseMap<GlobalVariable *, ConstInfoVecType>;
162+
ConstInfoVecType ConstIntInfoVec;
163+
GVInfoVecMapType ConstGEPInfoMap;
134164

135165
/// Keep track of cast instructions we already cloned.
136166
SmallDenseMap<Instruction *, Instruction *> ClonedCastMap;
137167

138-
/// These are the final constants we decided to hoist.
139-
SmallVector<consthoist::ConstantInfo, 8> ConstantVec;
140-
141168
Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const;
142169
SmallPtrSet<Instruction *, 8>
143170
findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo) const;
144171
void collectConstantCandidates(ConstCandMapType &ConstCandMap,
145172
Instruction *Inst, unsigned Idx,
146173
ConstantInt *ConstInt);
174+
void collectConstantCandidates(ConstCandMapType &ConstCandMap,
175+
Instruction *Inst, unsigned Idx,
176+
ConstantExpr *ConstExpr);
147177
void collectConstantCandidates(ConstCandMapType &ConstCandMap,
148178
Instruction *Inst, unsigned Idx);
149179
void collectConstantCandidates(ConstCandMapType &ConstCandMap,
150180
Instruction *Inst);
151181
void collectConstantCandidates(Function &Fn);
152182
void findAndMakeBaseConstant(ConstCandVecType::iterator S,
153-
ConstCandVecType::iterator E);
183+
ConstCandVecType::iterator E,
184+
SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec);
154185
unsigned maximizeConstantsInRange(ConstCandVecType::iterator S,
155186
ConstCandVecType::iterator E,
156187
ConstCandVecType::iterator &MaxCostItr);
157-
void findBaseConstants();
158-
void emitBaseConstants(Instruction *Base, Constant *Offset,
188+
// If BaseGV is nullptr, find base among Constant Integer candidates;
189+
// otherwise find base among constant GEPs sharing BaseGV as base pointer.
190+
void findBaseConstants(GlobalVariable *BaseGV);
191+
void emitBaseConstants(Instruction *Base, Constant *Offset, Type *Ty,
159192
const consthoist::ConstantUser &ConstUser);
160-
bool emitBaseConstants();
193+
// If BaseGV is nullptr, emit Constant Integer base; otherwise emit
194+
// constant GEP base.
195+
bool emitBaseConstants(GlobalVariable *BaseGV);
161196
void deleteDeadCastInst() const;
162197
bool optimizeConstants(Function &Fn);
163198
};

‎llvm/lib/Transforms/Scalar/ConstantHoisting.cpp

+137-37
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ static cl::opt<bool> ConstHoistWithBlockFrequency(
8282
"chance to execute const materialization more frequently than "
8383
"without hoisting."));
8484

85+
static cl::opt<bool> ConstHoistGEP(
86+
"consthoist-gep", cl::init(false), cl::Hidden,
87+
cl::desc("Try hoisting constant gep expressions"));
88+
8589
namespace {
8690

8791
/// The constant hoisting pass.
@@ -340,7 +344,7 @@ SmallPtrSet<Instruction *, 8> ConstantHoistingPass::findConstantInsertionPoint(
340344
///
341345
/// The operand at index Idx is not necessarily the constant integer itself. It
342346
/// could also be a cast instruction or a constant expression that uses the
343-
// constant integer.
347+
/// constant integer.
344348
void ConstantHoistingPass::collectConstantCandidates(
345349
ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx,
346350
ConstantInt *ConstInt) {
@@ -358,12 +362,13 @@ void ConstantHoistingPass::collectConstantCandidates(
358362
if (Cost > TargetTransformInfo::TCC_Basic) {
359363
ConstCandMapType::iterator Itr;
360364
bool Inserted;
361-
std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(ConstInt, 0));
365+
ConstPtrUnionType Cand = ConstInt;
366+
std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(Cand, 0));
362367
if (Inserted) {
363-
ConstCandVec.push_back(ConstantCandidate(ConstInt));
364-
Itr->second = ConstCandVec.size() - 1;
368+
ConstIntCandVec.push_back(ConstantCandidate(ConstInt));
369+
Itr->second = ConstIntCandVec.size() - 1;
365370
}
366-
ConstCandVec[Itr->second].addUser(Inst, Idx, Cost);
371+
ConstIntCandVec[Itr->second].addUser(Inst, Idx, Cost);
367372
LLVM_DEBUG(if (isa<ConstantInt>(Inst->getOperand(Idx))) dbgs()
368373
<< "Collect constant " << *ConstInt << " from " << *Inst
369374
<< " with cost " << Cost << '\n';
@@ -374,6 +379,48 @@ void ConstantHoistingPass::collectConstantCandidates(
374379
}
375380
}
376381

382+
/// Record constant GEP expression for instruction Inst at operand index Idx.
383+
void ConstantHoistingPass::collectConstantCandidates(
384+
ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx,
385+
ConstantExpr *ConstExpr) {
386+
// TODO: Handle vector GEPs
387+
if (ConstExpr->getType()->isVectorTy())
388+
return;
389+
390+
GlobalVariable *BaseGV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
391+
if (!BaseGV)
392+
return;
393+
394+
// Get offset from the base GV.
395+
PointerType *GVPtrTy = dyn_cast<PointerType>(BaseGV->getType());
396+
IntegerType *PtrIntTy = DL->getIntPtrType(*Ctx, GVPtrTy->getAddressSpace());
397+
APInt Offset(DL->getTypeSizeInBits(PtrIntTy), /*val*/0, /*isSigned*/true);
398+
auto *GEPO = cast<GEPOperator>(ConstExpr);
399+
if (!GEPO->accumulateConstantOffset(*DL, Offset))
400+
return;
401+
402+
if (!Offset.isIntN(32))
403+
return;
404+
405+
// A constant GEP expression that has a GlobalVariable as base pointer is
406+
// usually lowered to a load from constant pool. Such operation is unlikely
407+
// to be cheaper than compute it by <Base + Offset>, which can be lowered to
408+
// an ADD instruction or folded into Load/Store instruction.
409+
int Cost = TTI->getIntImmCost(Instruction::Add, 1, Offset, PtrIntTy);
410+
ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV];
411+
ConstCandMapType::iterator Itr;
412+
bool Inserted;
413+
ConstPtrUnionType Cand = ConstExpr;
414+
std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(Cand, 0));
415+
if (Inserted) {
416+
ExprCandVec.push_back(ConstantCandidate(
417+
ConstantInt::get(Type::getInt32Ty(*Ctx), Offset.getLimitedValue()),
418+
ConstExpr));
419+
Itr->second = ExprCandVec.size() - 1;
420+
}
421+
ExprCandVec[Itr->second].addUser(Inst, Idx, Cost);
422+
}
423+
377424
/// Check the operand for instruction Inst at index Idx.
378425
void ConstantHoistingPass::collectConstantCandidates(
379426
ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx) {
@@ -402,6 +449,10 @@ void ConstantHoistingPass::collectConstantCandidates(
402449

403450
// Visit constant expressions that have constant integers.
404451
if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
452+
// Handle constant gep expressions.
453+
if (ConstHoistGEP && ConstExpr->isGEPWithNoNotionalOverIndexing())
454+
collectConstantCandidates(ConstCandMap, Inst, Idx, ConstExpr);
455+
405456
// Only visit constant cast expressions.
406457
if (!ConstExpr->isCast())
407458
return;
@@ -544,34 +595,46 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
544595
/// Find the base constant within the given range and rebase all other
545596
/// constants with respect to the base constant.
546597
void ConstantHoistingPass::findAndMakeBaseConstant(
547-
ConstCandVecType::iterator S, ConstCandVecType::iterator E) {
598+
ConstCandVecType::iterator S, ConstCandVecType::iterator E,
599+
SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec) {
548600
auto MaxCostItr = S;
549601
unsigned NumUses = maximizeConstantsInRange(S, E, MaxCostItr);
550602

551603
// Don't hoist constants that have only one use.
552604
if (NumUses <= 1)
553605
return;
554606

607+
ConstantInt *ConstInt = MaxCostItr->ConstInt;
608+
ConstantExpr *ConstExpr = MaxCostItr->ConstExpr;
555609
ConstantInfo ConstInfo;
556-
ConstInfo.BaseConstant = MaxCostItr->ConstInt;
557-
Type *Ty = ConstInfo.BaseConstant->getType();
610+
ConstInfo.BaseInt = ConstInt;
611+
ConstInfo.BaseExpr = ConstExpr;
612+
Type *Ty = ConstInt->getType();
558613

559614
// Rebase the constants with respect to the base constant.
560615
for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
561-
APInt Diff = ConstCand->ConstInt->getValue() -
562-
ConstInfo.BaseConstant->getValue();
616+
APInt Diff = ConstCand->ConstInt->getValue() - ConstInt->getValue();
563617
Constant *Offset = Diff == 0 ? nullptr : ConstantInt::get(Ty, Diff);
618+
Type *ConstTy =
619+
ConstCand->ConstExpr ? ConstCand->ConstExpr->getType() : nullptr;
564620
ConstInfo.RebasedConstants.push_back(
565-
RebasedConstantInfo(std::move(ConstCand->Uses), Offset));
621+
RebasedConstantInfo(std::move(ConstCand->Uses), Offset, ConstTy));
566622
}
567-
ConstantVec.push_back(std::move(ConstInfo));
623+
ConstInfoVec.push_back(std::move(ConstInfo));
568624
}
569625

570626
/// Finds and combines constant candidates that can be easily
571627
/// rematerialized with an add from a common base constant.
572-
void ConstantHoistingPass::findBaseConstants() {
628+
void ConstantHoistingPass::findBaseConstants(GlobalVariable *BaseGV) {
629+
// If BaseGV is nullptr, find base among candidate constant integers;
630+
// Otherwise find base among constant GEPs that share the same BaseGV.
631+
ConstCandVecType &ConstCandVec = BaseGV ?
632+
ConstGEPCandMap[BaseGV] : ConstIntCandVec;
633+
ConstInfoVecType &ConstInfoVec = BaseGV ?
634+
ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
635+
573636
// Sort the constants by value and type. This invalidates the mapping!
574-
llvm::sort(ConstCandVec.begin(), ConstCandVec.end(),
637+
std::stable_sort(ConstCandVec.begin(), ConstCandVec.end(),
575638
[](const ConstantCandidate &LHS, const ConstantCandidate &RHS) {
576639
if (LHS.ConstInt->getType() != RHS.ConstInt->getType())
577640
return LHS.ConstInt->getType()->getBitWidth() <
@@ -613,12 +676,12 @@ void ConstantHoistingPass::findBaseConstants() {
613676
}
614677
// We either have now a different constant type or the constant is not in
615678
// range of an add with immediate anymore.
616-
findAndMakeBaseConstant(MinValItr, CC);
679+
findAndMakeBaseConstant(MinValItr, CC, ConstInfoVec);
617680
// Start a new base constant search.
618681
MinValItr = CC;
619682
}
620683
// Finalize the last base constant search.
621-
findAndMakeBaseConstant(MinValItr, ConstCandVec.end());
684+
findAndMakeBaseConstant(MinValItr, ConstCandVec.end(), ConstInfoVec);
622685
}
623686

624687
/// Updates the operand at Idx in instruction Inst with the result of
@@ -653,12 +716,28 @@ static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat) {
653716
/// users.
654717
void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
655718
Constant *Offset,
719+
Type *Ty,
656720
const ConstantUser &ConstUser) {
657721
Instruction *Mat = Base;
722+
723+
// The same offset can be dereferenced to different types in nested struct.
724+
if (!Offset && Ty && Ty != Base->getType())
725+
Offset = ConstantInt::get(Type::getInt32Ty(*Ctx), 0);
726+
658727
if (Offset) {
659728
Instruction *InsertionPt = findMatInsertPt(ConstUser.Inst,
660729
ConstUser.OpndIdx);
661-
Mat = BinaryOperator::Create(Instruction::Add, Base, Offset,
730+
if (Ty) {
731+
// Constant being rebased is a ConstantExpr.
732+
PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx,
733+
cast<PointerType>(Ty)->getAddressSpace());
734+
Base = new BitCastInst(Base, Int8PtrTy, "base_bitcast", InsertionPt);
735+
Mat = GetElementPtrInst::Create(Int8PtrTy->getElementType(), Base,
736+
Offset, "mat_gep", InsertionPt);
737+
Mat = new BitCastInst(Mat, Ty, "mat_bitcast", InsertionPt);
738+
} else
739+
// Constant being rebased is a ConstantInt.
740+
Mat = BinaryOperator::Create(Instruction::Add, Base, Offset,
662741
"const_mat", InsertionPt);
663742

664743
LLVM_DEBUG(dbgs() << "Materialize constant (" << *Base->getOperand(0)
@@ -702,6 +781,14 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
702781

703782
// Visit constant expression.
704783
if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
784+
if (ConstExpr->isGEPWithNoNotionalOverIndexing()) {
785+
// Operand is a ConstantGEP, replace it.
786+
updateOperand(ConstUser.Inst, ConstUser.OpndIdx, Mat);
787+
return;
788+
}
789+
790+
// Aside from constant GEPs, only constant cast expressions are collected.
791+
assert(ConstExpr->isCast() && "ConstExpr should be a cast");
705792
Instruction *ConstExprInst = ConstExpr->getAsInstruction();
706793
ConstExprInst->setOperand(0, Mat);
707794
ConstExprInst->insertBefore(findMatInsertPt(ConstUser.Inst,
@@ -725,23 +812,31 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
725812

726813
/// Hoist and hide the base constant behind a bitcast and emit
727814
/// materialization code for derived constants.
728-
bool ConstantHoistingPass::emitBaseConstants() {
815+
bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
729816
bool MadeChange = false;
730-
for (auto const &ConstInfo : ConstantVec) {
731-
// Hoist and hide the base constant behind a bitcast.
817+
SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec =
818+
BaseGV ? ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
819+
for (auto const &ConstInfo : ConstInfoVec) {
732820
SmallPtrSet<Instruction *, 8> IPSet = findConstantInsertionPoint(ConstInfo);
733821
assert(!IPSet.empty() && "IPSet is empty");
734822

735823
unsigned UsesNum = 0;
736824
unsigned ReBasesNum = 0;
737825
for (Instruction *IP : IPSet) {
738-
IntegerType *Ty = ConstInfo.BaseConstant->getType();
739-
Instruction *Base =
740-
new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
826+
Instruction *Base = nullptr;
827+
// Hoist and hide the base constant behind a bitcast.
828+
if (ConstInfo.BaseExpr) {
829+
assert(BaseGV && "A base constant expression must have an base GV");
830+
Type *Ty = ConstInfo.BaseExpr->getType();
831+
Base = new BitCastInst(ConstInfo.BaseExpr, Ty, "const", IP);
832+
} else {
833+
IntegerType *Ty = ConstInfo.BaseInt->getType();
834+
Base = new BitCastInst(ConstInfo.BaseInt, Ty, "const", IP);
835+
}
741836

742837
Base->setDebugLoc(IP->getDebugLoc());
743838

744-
LLVM_DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant
839+
LLVM_DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseInt
745840
<< ") to BB " << IP->getParent()->getName() << '\n'
746841
<< *Base << '\n');
747842

@@ -756,11 +851,12 @@ bool ConstantHoistingPass::emitBaseConstants() {
756851
// generate rebase for U using the Base dominating U.
757852
if (IPSet.size() == 1 ||
758853
DT->dominates(Base->getParent(), OrigMatInsertBB)) {
759-
emitBaseConstants(Base, RCI.Offset, U);
854+
emitBaseConstants(Base, RCI.Offset, RCI.Ty, U);
760855
ReBasesNum++;
761856
}
762857

763-
Base->setDebugLoc(DILocation::getMergedLocation(Base->getDebugLoc(), U.Inst->getDebugLoc()));
858+
Base->setDebugLoc(DILocation::getMergedLocation(
859+
Base->getDebugLoc(), U.Inst->getDebugLoc()));
764860
}
765861
}
766862
UsesNum = Uses;
@@ -779,7 +875,7 @@ bool ConstantHoistingPass::emitBaseConstants() {
779875

780876
// Base constant is also included in ConstInfo.RebasedConstants, so
781877
// deduct 1 from ConstInfo.RebasedConstants.size().
782-
NumConstantsRebased = ConstInfo.RebasedConstants.size() - 1;
878+
NumConstantsRebased += ConstInfo.RebasedConstants.size() - 1;
783879

784880
MadeChange = true;
785881
}
@@ -801,25 +897,29 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
801897
this->TTI = &TTI;
802898
this->DT = &DT;
803899
this->BFI = BFI;
900+
this->DL = &Fn.getParent()->getDataLayout();
901+
this->Ctx = &Fn.getContext();
804902
this->Entry = &Entry;
805903
// Collect all constant candidates.
806904
collectConstantCandidates(Fn);
807905

808-
// There are no constant candidates to worry about.
809-
if (ConstCandVec.empty())
810-
return false;
811-
812906
// Combine constants that can be easily materialized with an add from a common
813907
// base constant.
814-
findBaseConstants();
815-
816-
// There are no constants to emit.
817-
if (ConstantVec.empty())
818-
return false;
908+
if (!ConstIntCandVec.empty())
909+
findBaseConstants(nullptr);
910+
for (auto &MapEntry : ConstGEPCandMap)
911+
if (!MapEntry.second.empty())
912+
findBaseConstants(MapEntry.first);
819913

820914
// Finally hoist the base constant and emit materialization code for dependent
821915
// constants.
822-
bool MadeChange = emitBaseConstants();
916+
bool MadeChange = false;
917+
if (!ConstIntInfoVec.empty())
918+
MadeChange = emitBaseConstants(nullptr);
919+
for (auto MapEntry : ConstGEPInfoMap)
920+
if (!MapEntry.second.empty())
921+
MadeChange |= emitBaseConstants(MapEntry.first);
922+
823923

824924
// Cleanup dead instructions.
825925
deleteDeadCastInst();
+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
; RUN: llc -mtriple=aarch64-none-unknown-linuxeabi -consthoist-gep %s -o - | FileCheck %s
2+
3+
; CHECK-NOT: adrp x10, global+332
4+
; CHECK-NOT: add x10, x10, :lo12:global+332
5+
; CHECK: adrp x10, global+528
6+
; CHECK-NEXT: add x10, x10, :lo12:global+528
7+
8+
%struct.blam = type { %struct.bar, %struct.bar.0, %struct.wobble, %struct.wombat, i8, i16, %struct.snork.2, %struct.foo, %struct.snork.3, %struct.wobble.4, %struct.quux, [9 x i16], %struct.spam, %struct.zot }
9+
%struct.bar = type { i8, i8, %struct.snork }
10+
%struct.snork = type { i16, i8, i8 }
11+
%struct.bar.0 = type { i8, i8, i16, i8, i8, %struct.barney }
12+
%struct.barney = type { i8, i8, i8, i8 }
13+
%struct.wobble = type { i8, i8, %struct.eggs, %struct.bar.1 }
14+
%struct.eggs = type { i8, i8, i8 }
15+
%struct.bar.1 = type { i8, i8, i8, i8 }
16+
%struct.wombat = type { i8, i8, i16, i32, i32, i32, i32 }
17+
%struct.snork.2 = type { i8, i8, i8 }
18+
%struct.foo = type { [12 x i32], [12 x i32], [4 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
19+
%struct.snork.3 = type { i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16 }
20+
%struct.wobble.4 = type { i32, i32, i32, i32, i32, i32, i16, i16, i8, i8, i16, i32, i32, i16, i8, i8 }
21+
%struct.quux = type { i32, %struct.foo.5, i8, i8, i8, i8, i32, %struct.snork.6, %struct.foo.7, [16 x i8], i16, i16, i8, i8, i8, i8, i32, i32, i32 }
22+
%struct.foo.5 = type { i16, i8, i8 }
23+
%struct.snork.6 = type { i16, i8, i8 }
24+
%struct.foo.7 = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
25+
%struct.spam = type { i8, i8 }
26+
%struct.zot = type { [5 x i32], [3 x i32], [6 x i32], [3 x i32], [2 x i32], [4 x i32], [3 x i32], [2 x i32], [4 x i32], [5 x i32], [3 x i32], [6 x i32], [1 x i32], i32, i32, i32, i32, i32, i32 }
27+
28+
@global = external dso_local local_unnamed_addr global %struct.blam, align 4
29+
30+
; Function Attrs: norecurse nounwind optsize ssp
31+
define dso_local void @blam() local_unnamed_addr #0 {
32+
bb:
33+
%tmp = load i8, i8* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 7, i32 9), align 2, !tbaa !3
34+
%tmp1 = and i8 %tmp, 1
35+
%tmp2 = icmp eq i8 %tmp1, 0
36+
br i1 %tmp2, label %bb3, label %bb19
37+
38+
bb3: ; preds = %bb
39+
%tmp4 = load volatile i32, i32* inttoptr (i32 805874688 to i32*), align 1024, !tbaa !23
40+
store i32 %tmp4, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 0, i32 0), align 4, !tbaa !23
41+
%tmp5 = load volatile i32, i32* inttoptr (i32 805874692 to i32*), align 4, !tbaa !23
42+
%tmp6 = and i32 %tmp5, 65535
43+
store i32 %tmp6, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 0, i32 1), align 4, !tbaa !23
44+
%tmp7 = load volatile i32, i32* inttoptr (i32 805874696 to i32*), align 8, !tbaa !23
45+
%tmp8 = and i32 %tmp7, 522133279
46+
store i32 %tmp8, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 0, i32 2), align 4, !tbaa !23
47+
%tmp9 = load volatile i32, i32* inttoptr (i32 805874700 to i32*), align 4, !tbaa !23
48+
%tmp10 = and i32 %tmp9, 522133279
49+
store i32 %tmp10, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 0, i32 3), align 4, !tbaa !23
50+
%tmp11 = load volatile i32, i32* inttoptr (i32 805874860 to i32*), align 4, !tbaa !23
51+
%tmp12 = and i32 %tmp11, 16777215
52+
store i32 %tmp12, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 15), align 4, !tbaa !24
53+
%tmp13 = load volatile i32, i32* inttoptr (i32 805874864 to i32*), align 16, !tbaa !23
54+
%tmp14 = and i32 %tmp13, 16777215
55+
store i32 %tmp14, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 16), align 4, !tbaa !25
56+
%tmp15 = load volatile i32, i32* inttoptr (i32 805874868 to i32*), align 4, !tbaa !23
57+
%tmp16 = and i32 %tmp15, 16777215
58+
store i32 %tmp16, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 17), align 4, !tbaa !26
59+
%tmp17 = load volatile i32, i32* inttoptr (i32 805874872 to i32*), align 8, !tbaa !23
60+
%tmp18 = and i32 %tmp17, 16777215
61+
store i32 %tmp18, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 18), align 4, !tbaa !27
62+
br label %bb19
63+
64+
bb19: ; preds = %bb3, %bb
65+
ret void
66+
}
67+
68+
attributes #0 = { norecurse nounwind optsize ssp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
69+
70+
!llvm.module.flags = !{!0, !1}
71+
!llvm.ident = !{!2}
72+
73+
!0 = !{i32 1, !"wchar_size", i32 4}
74+
!1 = !{i32 1, !"min_enum_size", i32 1}
75+
!2 = !{!"Snapdragon LLVM ARM Compiler 8.0.0 (based on LLVM 8.0.0)"}
76+
!3 = !{!4, !6, i64 174}
77+
!4 = !{!"", !5, i64 0, !10, i64 6, !12, i64 16, !14, i64 28, !6, i64 48, !9, i64 50, !13, i64 52, !16, i64 56, !17, i64 176, !18, i64 196, !19, i64 240, !6, i64 312, !21, i64 330, !22, i64 332}
78+
!5 = !{!"", !6, i64 0, !6, i64 1, !8, i64 2}
79+
!6 = !{!"omnipotent char", !7, i64 0}
80+
!7 = !{!"Simple C/C++ TBAA"}
81+
!8 = !{!"", !9, i64 0, !6, i64 2, !6, i64 3}
82+
!9 = !{!"short", !6, i64 0}
83+
!10 = !{!"", !6, i64 0, !6, i64 1, !9, i64 2, !6, i64 4, !6, i64 5, !11, i64 6}
84+
!11 = !{!"", !6, i64 0, !6, i64 1, !6, i64 2, !6, i64 3}
85+
!12 = !{!"", !6, i64 0, !6, i64 1, !13, i64 2, !11, i64 5}
86+
!13 = !{!"", !6, i64 0, !6, i64 1, !6, i64 2}
87+
!14 = !{!"", !6, i64 0, !6, i64 1, !9, i64 2, !15, i64 4, !15, i64 8, !15, i64 12, !15, i64 16}
88+
!15 = !{!"long", !6, i64 0}
89+
!16 = !{!"", !6, i64 0, !6, i64 48, !6, i64 96, !6, i64 112, !6, i64 113, !6, i64 114, !6, i64 115, !6, i64 116, !6, i64 117, !6, i64 118, !6, i64 119}
90+
!17 = !{!"", !9, i64 0, !6, i64 2, !6, i64 3, !6, i64 4, !6, i64 5, !6, i64 6, !6, i64 7, !6, i64 8, !6, i64 9, !6, i64 10, !6, i64 11, !6, i64 12, !6, i64 13, !6, i64 14, !6, i64 15, !9, i64 16}
91+
!18 = !{!"", !15, i64 0, !15, i64 4, !15, i64 8, !15, i64 12, !15, i64 16, !15, i64 20, !9, i64 24, !9, i64 26, !6, i64 28, !6, i64 29, !9, i64 30, !15, i64 32, !15, i64 36, !9, i64 40, !6, i64 42, !6, i64 43}
92+
!19 = !{!"", !15, i64 0, !8, i64 4, !6, i64 8, !6, i64 9, !6, i64 10, !6, i64 11, !15, i64 12, !8, i64 16, !20, i64 20, !6, i64 36, !9, i64 52, !9, i64 54, !6, i64 56, !6, i64 57, !6, i64 58, !6, i64 59, !15, i64 60, !15, i64 64, !15, i64 68}
93+
!20 = !{!"", !6, i64 0, !6, i64 1, !6, i64 2, !6, i64 3, !6, i64 4, !6, i64 5, !6, i64 6, !6, i64 7, !6, i64 8, !6, i64 9, !6, i64 10, !6, i64 11, !6, i64 12, !6, i64 13, !6, i64 14, !6, i64 15}
94+
!21 = !{!"", !6, i64 0, !6, i64 1}
95+
!22 = !{!"", !6, i64 0, !6, i64 20, !6, i64 32, !6, i64 56, !6, i64 68, !6, i64 76, !6, i64 92, !6, i64 104, !6, i64 112, !6, i64 128, !6, i64 148, !6, i64 160, !6, i64 184, !15, i64 188, !15, i64 192, !15, i64 196, !15, i64 200, !15, i64 204, !15, i64 208}
96+
!23 = !{!15, !15, i64 0}
97+
!24 = !{!4, !15, i64 528}
98+
!25 = !{!4, !15, i64 532}
99+
!26 = !{!4, !15, i64 536}
100+
!27 = !{!4, !15, i64 540}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; RUN: opt -consthoist -consthoist-gep -S -o - %s | FileCheck %s
2+
3+
target triple = "aarch64-none--musleabi"
4+
5+
; Check that constant GEP expressions are rewritten to one-dimensional
6+
; (single-index) GEPs, whose base poiner is a multi-dimensional GEP.
7+
; CHECK: %const = bitcast i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 0) to i32*
8+
; CHECK-NEXT: store i32 undef, i32* %const, align 4
9+
10+
; CHECK-NEXT: %[[BC1:[a-z0-9_]+]] = bitcast i32* %const to i8*
11+
; CHECK-NEXT: %[[M1:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC1]], i32 4
12+
; CHECK-NEXT: %[[BC2:[a-z0-9_]+]] = bitcast i8* %[[M1]] to i32*
13+
; CHECK-NEXT: store i32 undef, i32* %[[BC2]], align 4
14+
15+
; CHECK-NEXT: %[[BC3:[a-z0-9_]+]] = bitcast i32* %const to i8*
16+
; CHECK-NEXT: %[[M2:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC3]], i32 160
17+
; CHECK-NEXT: %[[BC4:[a-z0-9_]+]] = bitcast i8* %[[M2]] to i32*
18+
; CHECK-NEXT: store i32 undef, i32* %[[BC4]], align 4
19+
20+
; CHECK-NEXT: %[[BC5:[a-z0-9_]+]] = bitcast i32* %const to i8*
21+
; CHECK-NEXT: %[[M3:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC5]], i32 164
22+
; CHECK-NEXT: %[[BC6:[a-z0-9_]+]] = bitcast i8* %[[M3]] to i32*
23+
; CHECK-NEXT: store i32 undef, i32* %[[BC6]], align 4
24+
25+
%0 = type { %1, %2, [9 x i16], %6, %7 }
26+
%1 = type { i32, i32, i32, i32, i32, i32, i16, i16, i8, i8, i16, i32, i32, i16, i8, i8 }
27+
%2 = type { i32, %3, i8, i8, i8, i8, i32, %4, %5, [16 x i8], i16, i16, i8, i8, i8, i8, i32, i32, i32 }
28+
%3 = type { i16, i8, i8 }
29+
%4 = type { i16, i8, i8 }
30+
%5 = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
31+
%6 = type { i8, i8 }
32+
%7 = type { [5 x i32], [3 x i32], [6 x i32], [3 x i32], [2 x i32], [4 x i32], [3 x i32], [2 x i32], [4 x i32], [5 x i32], [3 x i32], [6 x i32], [1 x i32], i32, i32, i32, i32, i32, i32 }
33+
34+
@global = external dso_local local_unnamed_addr global %0, align 4
35+
36+
define dso_local void @zot() {
37+
bb:
38+
store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 0), align 4
39+
store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 1), align 4
40+
store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 0), align 4
41+
store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 1), align 4
42+
ret void
43+
}
44+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; RUN: opt -consthoist -consthoist-gep -S -o - %s | FileCheck %s
2+
3+
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
4+
target triple = "thumbv6m-none--musleabi"
5+
6+
; Check that constant GEP expressions are rewritten to one-dimensional
7+
; (single-index) GEPs, whose base poiner is a multi-dimensional GEP.
8+
; CHECK-DAG: %[[C1:const[0-9]?]] = bitcast i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 0) to i32*
9+
; CHECK-DAG: %[[C2:const[0-9]?]] = bitcast i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 0) to i32*
10+
11+
; CHECK: store i32 undef, i32* %[[C2]], align 4
12+
; CHECK-NEXT: %[[BC1:[a-z0-9_]+]] = bitcast i32* %[[C2]] to i8*
13+
; CHECK-NEXT: %[[M1:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC1]], i32 4
14+
; CHECK-NEXT: %[[BC2:[a-z0-9_]+]] = bitcast i8* %[[M1]] to i32*
15+
; CHECK-NEXT: store i32 undef, i32* %[[BC2]], align 4
16+
17+
; CHECK-NEXT: store i32 undef, i32* %[[C1]], align 4
18+
; CHECK-NEXT: %[[BC3:[a-z0-9_]+]] = bitcast i32* %[[C1]] to i8*
19+
; CHECK-NEXT: %[[M2:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC3]], i32 4
20+
; CHECK-NEXT: %[[BC4:[a-z0-9_]+]] = bitcast i8* %[[M2]] to i32*
21+
; CHECK-NEXT: store i32 undef, i32* %[[BC4]], align 4
22+
23+
%0 = type { %1, %2, [9 x i16], %6, %7 }
24+
%1 = type { i32, i32, i32, i32, i32, i32, i16, i16, i8, i8, i16, i32, i32, i16, i8, i8 }
25+
%2 = type { i32, %3, i8, i8, i8, i8, i32, %4, %5, [16 x i8], i16, i16, i8, i8, i8, i8, i32, i32, i32 }
26+
%3 = type { i16, i8, i8 }
27+
%4 = type { i16, i8, i8 }
28+
%5 = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
29+
%6 = type { i8, i8 }
30+
%7 = type { [5 x i32], [3 x i32], [6 x i32], [3 x i32], [2 x i32], [4 x i32], [3 x i32], [2 x i32], [4 x i32], [5 x i32], [3 x i32], [6 x i32], [1 x i32], i32, i32, i32, i32, i32, i32 }
31+
32+
@global = external dso_local local_unnamed_addr global %0, align 4
33+
34+
define dso_local void @zot() {
35+
bb:
36+
store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 0), align 4
37+
store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 1), align 4
38+
store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 0), align 4
39+
store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 1), align 4
40+
ret void
41+
}
42+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; RUN: opt -consthoist -consthoist-gep -S -o - %s | FileCheck %s
2+
3+
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
4+
target triple = "thumbv6m-none--musleabi"
5+
6+
; Check that for the same offset from the base constant, different types are materialized separately.
7+
; CHECK: %const = bitcast %5** getelementptr inbounds (%0, %0* @global, i32 0, i32 2, i32 0) to %5**
8+
; CHECK: %tmp = load %5*, %5** %const, align 4
9+
; CHECK: %base_bitcast = bitcast %5** %const to i8*
10+
; CHECK: %mat_gep = getelementptr i8, i8* %base_bitcast, i32 0
11+
; CHECK: %mat_bitcast = bitcast i8* %mat_gep to %4*
12+
; CHECK: tail call void undef(%5* nonnull %tmp, %4* %mat_bitcast)
13+
14+
%0 = type { [16 x %1], %2, %4, [16 x %5], %6, %7, i32, [4 x i32], [8 x %3], i8, i8, i8, i8, i8, i8, i8, %8, %11, %11*, i32, i16, i8, i8, i8, i8, i8, i8, [15 x i16], i8, i8, [23 x %12], i8, i8*, i8, %13, i8, i8 }
15+
%1 = type { i32, i32, i8, i8, i8, i8, i8, i8, i8, i8 }
16+
%2 = type { %3*, i16, i16, i16 }
17+
%3 = type { [4 x i32] }
18+
%4 = type { %5*, %5*, i8 }
19+
%5 = type { [4 x i32], i8*, i8, i8 }
20+
%6 = type { i8, [4 x i32] }
21+
%7 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
22+
%8 = type { [16 x %9], %9*, %9*, %9*, %9*, %11, %11, %11, i8, i8, i8, i8 }
23+
%9 = type { %1, %11, %11, %9*, %9*, %10, i8, i8, i8, i8 }
24+
%10 = type { i32, i16 }
25+
%11 = type { %11*, %11* }
26+
%12 = type { i8, i16, i32 }
27+
%13 = type { i32, i32, i8 }
28+
29+
@global = external dso_local global %0, align 4
30+
31+
; Function Attrs: nounwind optsize ssp
32+
define dso_local void @zot() {
33+
bb:
34+
br i1 undef, label %bb2, label %bb1
35+
36+
bb1: ; preds = %bb
37+
%tmp = load %5*, %5** getelementptr inbounds (%0, %0* @global, i32 0, i32 2, i32 0), align 4
38+
tail call void undef(%5* nonnull %tmp, %4* getelementptr inbounds (%0, %0* @global, i32 0, i32 2))
39+
unreachable
40+
41+
bb2: ; preds = %bb
42+
ret void
43+
}
44+

0 commit comments

Comments
 (0)
Please sign in to comment.