Index: include/llvm/Transforms/Scalar/ConstantHoisting.h =================================================================== --- include/llvm/Transforms/Scalar/ConstantHoisting.h +++ include/llvm/Transforms/Scalar/ConstantHoisting.h @@ -38,6 +38,7 @@ #define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" @@ -50,8 +51,10 @@ class BlockFrequencyInfo; class Constant; class ConstantInt; +class ConstantExpr; class DominatorTree; class Function; +class GlobalVariable; class Instruction; class TargetTransformInfo; @@ -74,10 +77,15 @@ /// Keeps track of a constant candidate and its uses. struct ConstantCandidate { ConstantUseListType Uses; + // If the candidate is a ConstantExpr (currely only constant GEP expressions + // whose base pointers are GlobalVariables are supported), ConstInt records + // its offset from the base GV, ConstExpr tracks the candidate GEP expr. ConstantInt *ConstInt; + ConstantExpr *ConstExpr; unsigned CumulativeCost = 0; - ConstantCandidate(ConstantInt *ConstInt) : ConstInt(ConstInt) {} + ConstantCandidate(ConstantInt *ConstInt, ConstantExpr *ConstExpr=nullptr) : + ConstInt(ConstInt), ConstExpr(ConstExpr) {} /// Add the user to the use list and update the cost. void addUser(Instruction *Inst, unsigned Idx, unsigned Cost) { @@ -91,16 +99,21 @@ struct RebasedConstantInfo { ConstantUseListType Uses; Constant *Offset; + Type *Ty; - RebasedConstantInfo(ConstantUseListType &&Uses, Constant *Offset) - : Uses(std::move(Uses)), Offset(Offset) {} + RebasedConstantInfo(ConstantUseListType &&Uses, Constant *Offset, + Type *Ty=nullptr) : Uses(std::move(Uses)), Offset(Offset), Ty(Ty) {} }; using RebasedConstantListType = SmallVector; /// A base constant and all its rebased constants. struct ConstantInfo { - ConstantInt *BaseConstant; + // If the candidate is a ConstantExpr (currely only constant GEP expressions + // whose base pointers are GlobalVariables are supported), ConstInt records + // its offset from the base GV, ConstExpr tracks the candidate GEP expr. + ConstantInt *BaseInt; + ConstantExpr *BaseExpr; RebasedConstantListType RebasedConstants; }; @@ -115,29 +128,43 @@ BlockFrequencyInfo *BFI, BasicBlock &Entry); void releaseMemory() { - ConstantVec.clear(); ClonedCastMap.clear(); - ConstCandVec.clear(); + ConstIntCandVec.clear(); + for (auto MapEntry : ConstGEPCandMap) + MapEntry.second.clear(); + ConstGEPCandMap.clear(); + ConstIntInfoVec.clear(); + for (auto MapEntry : ConstGEPInfoMap) + MapEntry.second.clear(); + ConstGEPInfoMap.clear(); } private: - using ConstCandMapType = DenseMap; - using ConstCandVecType = std::vector; + using ConstPtrUnionType = PointerUnion; + using ConstCandMapType = DenseMap; const TargetTransformInfo *TTI; DominatorTree *DT; BlockFrequencyInfo *BFI; + LLVMContext *Ctx; + const DataLayout *DL; BasicBlock *Entry; /// Keeps track of constant candidates found in the function. - ConstCandVecType ConstCandVec; + using ConstCandVecType = std::vector; + using GVCandVecMapType = DenseMap; + ConstCandVecType ConstIntCandVec; + GVCandVecMapType ConstGEPCandMap; + + /// These are the final constants we decided to hoist. + using ConstInfoVecType = SmallVector; + using GVInfoVecMapType = DenseMap; + ConstInfoVecType ConstIntInfoVec; + GVInfoVecMapType ConstGEPInfoMap; /// Keep track of cast instructions we already cloned. SmallDenseMap ClonedCastMap; - /// These are the final constants we decided to hoist. - SmallVector ConstantVec; - Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const; SmallPtrSet findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo) const; @@ -145,19 +172,27 @@ Instruction *Inst, unsigned Idx, ConstantInt *ConstInt); void collectConstantCandidates(ConstCandMapType &ConstCandMap, + Instruction *Inst, unsigned Idx, + ConstantExpr *ConstExpr); + void collectConstantCandidates(ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx); void collectConstantCandidates(ConstCandMapType &ConstCandMap, Instruction *Inst); void collectConstantCandidates(Function &Fn); void findAndMakeBaseConstant(ConstCandVecType::iterator S, - ConstCandVecType::iterator E); + ConstCandVecType::iterator E, + SmallVectorImpl &ConstInfoVec); unsigned maximizeConstantsInRange(ConstCandVecType::iterator S, ConstCandVecType::iterator E, ConstCandVecType::iterator &MaxCostItr); - void findBaseConstants(); - void emitBaseConstants(Instruction *Base, Constant *Offset, + // If BaseGV is nullptr, find base among Constant Integer candidates; + // otherwise find base among constant GEPs sharing BaseGV as base pointer. + void findBaseConstants(GlobalVariable *BaseGV); + void emitBaseConstants(Instruction *Base, Constant *Offset, Type *Ty, const consthoist::ConstantUser &ConstUser); - bool emitBaseConstants(); + // If BaseGV is nullptr, emit Constant Integer base; otherwise emit + // constant GEP base. + bool emitBaseConstants(GlobalVariable *BaseGV); void deleteDeadCastInst() const; bool optimizeConstants(Function &Fn); }; Index: lib/Transforms/Scalar/ConstantHoisting.cpp =================================================================== --- lib/Transforms/Scalar/ConstantHoisting.cpp +++ lib/Transforms/Scalar/ConstantHoisting.cpp @@ -82,6 +82,10 @@ "chance to execute const materialization more frequently than " "without hoisting.")); +static cl::opt ConstHoistGEP( + "consthoist-gep", cl::init(false), cl::Hidden, + cl::desc("Try hoisting constant gep expressions")); + namespace { /// The constant hoisting pass. @@ -340,7 +344,7 @@ /// /// The operand at index Idx is not necessarily the constant integer itself. It /// could also be a cast instruction or a constant expression that uses the -// constant integer. +/// constant integer. void ConstantHoistingPass::collectConstantCandidates( ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx, ConstantInt *ConstInt) { @@ -358,12 +362,13 @@ if (Cost > TargetTransformInfo::TCC_Basic) { ConstCandMapType::iterator Itr; bool Inserted; - std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(ConstInt, 0)); + ConstPtrUnionType Cand = ConstInt; + std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(Cand, 0)); if (Inserted) { - ConstCandVec.push_back(ConstantCandidate(ConstInt)); - Itr->second = ConstCandVec.size() - 1; + ConstIntCandVec.push_back(ConstantCandidate(ConstInt)); + Itr->second = ConstIntCandVec.size() - 1; } - ConstCandVec[Itr->second].addUser(Inst, Idx, Cost); + ConstIntCandVec[Itr->second].addUser(Inst, Idx, Cost); LLVM_DEBUG(if (isa(Inst->getOperand(Idx))) dbgs() << "Collect constant " << *ConstInt << " from " << *Inst << " with cost " << Cost << '\n'; @@ -374,6 +379,48 @@ } } +/// Record constant GEP expression for instruction Inst at operand index Idx. +void ConstantHoistingPass::collectConstantCandidates( + ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx, + ConstantExpr *ConstExpr) { + // TODO: Handle vector GEPs + if (ConstExpr->getType()->isVectorTy()) + return; + + GlobalVariable *BaseGV = dyn_cast(ConstExpr->getOperand(0)); + if (!BaseGV) + return; + + // Get offset from the base GV. + PointerType *GVPtrTy = dyn_cast(BaseGV->getType()); + IntegerType *PtrIntTy = DL->getIntPtrType(*Ctx, GVPtrTy->getAddressSpace()); + APInt Offset(DL->getTypeSizeInBits(PtrIntTy), /*val*/0, /*isSigned*/true); + auto *GEPO = cast(ConstExpr); + if (!GEPO->accumulateConstantOffset(*DL, Offset)) + return; + + if (!Offset.isIntN(32)) + return; + + // A constant GEP expression that has a GlobalVariable as base pointer is + // usually lowered to a load from constant pool. Such operation is unlikely + // to be cheaper than compute it by , which can be lowered to + // an ADD instruction or folded into Load/Store instruction. + int Cost = TTI->getIntImmCost(Instruction::Add, 1, Offset, PtrIntTy); + ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV]; + ConstCandMapType::iterator Itr; + bool Inserted; + ConstPtrUnionType Cand = ConstExpr; + std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(Cand, 0)); + if (Inserted) { + ExprCandVec.push_back(ConstantCandidate( + ConstantInt::get(Type::getInt32Ty(*Ctx), Offset.getLimitedValue()), + ConstExpr)); + Itr->second = ExprCandVec.size() - 1; + } + ExprCandVec[Itr->second].addUser(Inst, Idx, Cost); +} + /// Check the operand for instruction Inst at index Idx. void ConstantHoistingPass::collectConstantCandidates( ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx) { @@ -402,6 +449,10 @@ // Visit constant expressions that have constant integers. if (auto ConstExpr = dyn_cast(Opnd)) { + // Handle constant gep expressions. + if (ConstHoistGEP && ConstExpr->isGEPWithNoNotionalOverIndexing()) + collectConstantCandidates(ConstCandMap, Inst, Idx, ConstExpr); + // Only visit constant cast expressions. if (!ConstExpr->isCast()) return; @@ -544,7 +595,8 @@ /// Find the base constant within the given range and rebase all other /// constants with respect to the base constant. void ConstantHoistingPass::findAndMakeBaseConstant( - ConstCandVecType::iterator S, ConstCandVecType::iterator E) { + ConstCandVecType::iterator S, ConstCandVecType::iterator E, + SmallVectorImpl &ConstInfoVec) { auto MaxCostItr = S; unsigned NumUses = maximizeConstantsInRange(S, E, MaxCostItr); @@ -552,30 +604,60 @@ if (NumUses <= 1) return; + ConstantInt *ConstInt = MaxCostItr->ConstInt; + ConstantExpr *ConstExpr = MaxCostItr->ConstExpr; ConstantInfo ConstInfo; - ConstInfo.BaseConstant = MaxCostItr->ConstInt; - Type *Ty = ConstInfo.BaseConstant->getType(); + ConstInfo.BaseInt = ConstInt; + ConstInfo.BaseExpr = ConstExpr; + Type *Ty = ConstInt->getType(); // Rebase the constants with respect to the base constant. for (auto ConstCand = S; ConstCand != E; ++ConstCand) { - APInt Diff = ConstCand->ConstInt->getValue() - - ConstInfo.BaseConstant->getValue(); + APInt Diff = ConstCand->ConstInt->getValue() - ConstInt->getValue(); Constant *Offset = Diff == 0 ? nullptr : ConstantInt::get(Ty, Diff); + Type *ConstTy = + ConstCand->ConstExpr ? ConstCand->ConstExpr->getType() : nullptr; ConstInfo.RebasedConstants.push_back( - RebasedConstantInfo(std::move(ConstCand->Uses), Offset)); + RebasedConstantInfo(std::move(ConstCand->Uses), Offset, ConstTy)); } - ConstantVec.push_back(std::move(ConstInfo)); + ConstInfoVec.push_back(std::move(ConstInfo)); } /// Finds and combines constant candidates that can be easily /// rematerialized with an add from a common base constant. -void ConstantHoistingPass::findBaseConstants() { +void ConstantHoistingPass::findBaseConstants(GlobalVariable *BaseGV) { + // If BaseGV is nullptr, find base among candidate constant integers; + // Otherwise find base among constant GEPs that share the same BaseGV. + ConstCandVecType &ConstCandVec = BaseGV ? + ConstGEPCandMap[BaseGV] : ConstIntCandVec; + ConstInfoVecType &ConstInfoVec = BaseGV ? + ConstGEPInfoMap[BaseGV] : ConstIntInfoVec; + // Sort the constants by value and type. This invalidates the mapping! llvm::sort(ConstCandVec.begin(), ConstCandVec.end(), [](const ConstantCandidate &LHS, const ConstantCandidate &RHS) { if (LHS.ConstInt->getType() != RHS.ConstInt->getType()) return LHS.ConstInt->getType()->getBitWidth() < RHS.ConstInt->getType()->getBitWidth(); + if (LHS.ConstExpr) { + assert (RHS.ConstExpr && "Illegal comparison between a constant integer" + "and a constant gep expression candidates."); + if (LHS.ConstExpr->getType() != RHS.ConstExpr->getType() && + LHS.ConstInt->getValue().eq(RHS.ConstInt->getValue())) + // If two GEPs, shareing the same GV as base pointer, having the same + // offset, but are of different types, their number of indices must be + // different. For example: + // %0 = type { [16 x i8], i16, %1] + // %1 = type { %2*, %2*, i8 } + // %2 = type { [4 x i32], i8*, i8, i8 } + // @global = external dso_local global %0, align 4 + // %2** getelementptr inbounds (%0, %0* @global, i32 0, i32 2, i32 0) + // %1* getelementptr inbounds (%0, %0* @global, i32 0, i32 2)) + return LHS.ConstExpr->getNumOperands() < + RHS.ConstExpr->getNumOperands(); + } + // If two GEPs have the same number of indices, they must have different + // offset. return LHS.ConstInt->getValue().ult(RHS.ConstInt->getValue()); }); @@ -613,12 +695,12 @@ } // We either have now a different constant type or the constant is not in // range of an add with immediate anymore. - findAndMakeBaseConstant(MinValItr, CC); + findAndMakeBaseConstant(MinValItr, CC, ConstInfoVec); // Start a new base constant search. MinValItr = CC; } // Finalize the last base constant search. - findAndMakeBaseConstant(MinValItr, ConstCandVec.end()); + findAndMakeBaseConstant(MinValItr, ConstCandVec.end(), ConstInfoVec); } /// Updates the operand at Idx in instruction Inst with the result of @@ -653,12 +735,28 @@ /// users. void ConstantHoistingPass::emitBaseConstants(Instruction *Base, Constant *Offset, + Type *Ty, const ConstantUser &ConstUser) { Instruction *Mat = Base; + + // The same offset can be dereferenced to different types in nested struct. + if (!Offset && Ty && Ty != Base->getType()) + Offset = ConstantInt::get(Type::getInt32Ty(*Ctx), 0); + if (Offset) { Instruction *InsertionPt = findMatInsertPt(ConstUser.Inst, ConstUser.OpndIdx); - Mat = BinaryOperator::Create(Instruction::Add, Base, Offset, + if (Ty) { + // Constant being rebased is a ConstantExpr. + PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx, + cast(Ty)->getAddressSpace()); + Base = new BitCastInst(Base, Int8PtrTy, "base_bitcast", InsertionPt); + Mat = GetElementPtrInst::Create(Int8PtrTy->getElementType(), Base, + Offset, "mat_gep", InsertionPt); + Mat = new BitCastInst(Mat, Ty, "mat_bitcast", InsertionPt); + } else + // Constant being rebased is a ConstantInt. + Mat = BinaryOperator::Create(Instruction::Add, Base, Offset, "const_mat", InsertionPt); LLVM_DEBUG(dbgs() << "Materialize constant (" << *Base->getOperand(0) @@ -702,6 +800,14 @@ // Visit constant expression. if (auto ConstExpr = dyn_cast(Opnd)) { + if (ConstExpr->isGEPWithNoNotionalOverIndexing()) { + // Operand is a ConstantGEP, replace it. + updateOperand(ConstUser.Inst, ConstUser.OpndIdx, Mat); + return; + } + + // Aside from constant GEPs, only constant cast expressions are collected. + assert(ConstExpr->isCast() && "ConstExpr should be a cast"); Instruction *ConstExprInst = ConstExpr->getAsInstruction(); ConstExprInst->setOperand(0, Mat); ConstExprInst->insertBefore(findMatInsertPt(ConstUser.Inst, @@ -725,23 +831,31 @@ /// Hoist and hide the base constant behind a bitcast and emit /// materialization code for derived constants. -bool ConstantHoistingPass::emitBaseConstants() { +bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) { bool MadeChange = false; - for (auto const &ConstInfo : ConstantVec) { - // Hoist and hide the base constant behind a bitcast. + SmallVectorImpl &ConstInfoVec = + BaseGV ? ConstGEPInfoMap[BaseGV] : ConstIntInfoVec; + for (auto const &ConstInfo : ConstInfoVec) { SmallPtrSet IPSet = findConstantInsertionPoint(ConstInfo); assert(!IPSet.empty() && "IPSet is empty"); unsigned UsesNum = 0; unsigned ReBasesNum = 0; for (Instruction *IP : IPSet) { - IntegerType *Ty = ConstInfo.BaseConstant->getType(); - Instruction *Base = - new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP); + Instruction *Base = nullptr; + // Hoist and hide the base constant behind a bitcast. + if (ConstInfo.BaseExpr) { + assert(BaseGV && "A base constant expression must have an base GV"); + Type *Ty = ConstInfo.BaseExpr->getType(); + Base = new BitCastInst(ConstInfo.BaseExpr, Ty, "const", IP); + } else { + IntegerType *Ty = ConstInfo.BaseInt->getType(); + Base = new BitCastInst(ConstInfo.BaseInt, Ty, "const", IP); + } Base->setDebugLoc(IP->getDebugLoc()); - LLVM_DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant + LLVM_DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseInt << ") to BB " << IP->getParent()->getName() << '\n' << *Base << '\n'); @@ -756,11 +870,12 @@ // generate rebase for U using the Base dominating U. if (IPSet.size() == 1 || DT->dominates(Base->getParent(), OrigMatInsertBB)) { - emitBaseConstants(Base, RCI.Offset, U); + emitBaseConstants(Base, RCI.Offset, RCI.Ty, U); ReBasesNum++; } - Base->setDebugLoc(DILocation::getMergedLocation(Base->getDebugLoc(), U.Inst->getDebugLoc())); + Base->setDebugLoc(DILocation::getMergedLocation( + Base->getDebugLoc(), U.Inst->getDebugLoc())); } } UsesNum = Uses; @@ -779,7 +894,7 @@ // Base constant is also included in ConstInfo.RebasedConstants, so // deduct 1 from ConstInfo.RebasedConstants.size(). - NumConstantsRebased = ConstInfo.RebasedConstants.size() - 1; + NumConstantsRebased += ConstInfo.RebasedConstants.size() - 1; MadeChange = true; } @@ -801,25 +916,29 @@ this->TTI = &TTI; this->DT = &DT; this->BFI = BFI; + this->DL = &Fn.getParent()->getDataLayout(); + this->Ctx = &Fn.getContext(); this->Entry = &Entry; // Collect all constant candidates. collectConstantCandidates(Fn); - // There are no constant candidates to worry about. - if (ConstCandVec.empty()) - return false; - // Combine constants that can be easily materialized with an add from a common // base constant. - findBaseConstants(); - - // There are no constants to emit. - if (ConstantVec.empty()) - return false; + if (!ConstIntCandVec.empty()) + findBaseConstants(nullptr); + for (auto &MapEntry : ConstGEPCandMap) + if (!MapEntry.second.empty()) + findBaseConstants(MapEntry.first); // Finally hoist the base constant and emit materialization code for dependent // constants. - bool MadeChange = emitBaseConstants(); + bool MadeChange = false; + if (!ConstIntInfoVec.empty()) + MadeChange = emitBaseConstants(nullptr); + for (auto MapEntry : ConstGEPInfoMap) + if (!MapEntry.second.empty()) + MadeChange |= emitBaseConstants(MapEntry.first); + // Cleanup dead instructions. deleteDeadCastInst(); Index: test/CodeGen/AArch64/consthoist-gep.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/consthoist-gep.ll @@ -0,0 +1,100 @@ +; RUN: llc -mtriple=aarch64-none-unknown-linuxeabi -consthoist-gep %s -o - | FileCheck %s + +; CHECK-NOT: adrp x10, global+332 +; CHECK-NOT: add x10, x10, :lo12:global+332 +; CHECK: adrp x10, global+528 +; CHECK-NEXT: add x10, x10, :lo12:global+528 + +%struct.blam = type { %struct.bar, %struct.bar.0, %struct.wobble, %struct.wombat, i8, i16, %struct.snork.2, %struct.foo, %struct.snork.3, %struct.wobble.4, %struct.quux, [9 x i16], %struct.spam, %struct.zot } +%struct.bar = type { i8, i8, %struct.snork } +%struct.snork = type { i16, i8, i8 } +%struct.bar.0 = type { i8, i8, i16, i8, i8, %struct.barney } +%struct.barney = type { i8, i8, i8, i8 } +%struct.wobble = type { i8, i8, %struct.eggs, %struct.bar.1 } +%struct.eggs = type { i8, i8, i8 } +%struct.bar.1 = type { i8, i8, i8, i8 } +%struct.wombat = type { i8, i8, i16, i32, i32, i32, i32 } +%struct.snork.2 = type { i8, i8, i8 } +%struct.foo = type { [12 x i32], [12 x i32], [4 x i32], i8, i8, i8, i8, i8, i8, i8, i8 } +%struct.snork.3 = type { i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16 } +%struct.wobble.4 = type { i32, i32, i32, i32, i32, i32, i16, i16, i8, i8, i16, i32, i32, i16, i8, i8 } +%struct.quux = type { i32, %struct.foo.5, i8, i8, i8, i8, i32, %struct.snork.6, %struct.foo.7, [16 x i8], i16, i16, i8, i8, i8, i8, i32, i32, i32 } +%struct.foo.5 = type { i16, i8, i8 } +%struct.snork.6 = type { i16, i8, i8 } +%struct.foo.7 = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } +%struct.spam = type { i8, i8 } +%struct.zot = type { [5 x i32], [3 x i32], [6 x i32], [3 x i32], [2 x i32], [4 x i32], [3 x i32], [2 x i32], [4 x i32], [5 x i32], [3 x i32], [6 x i32], [1 x i32], i32, i32, i32, i32, i32, i32 } + +@global = external dso_local local_unnamed_addr global %struct.blam, align 4 + +; Function Attrs: norecurse nounwind optsize ssp +define dso_local void @blam() local_unnamed_addr #0 { +bb: + %tmp = load i8, i8* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 7, i32 9), align 2, !tbaa !3 + %tmp1 = and i8 %tmp, 1 + %tmp2 = icmp eq i8 %tmp1, 0 + br i1 %tmp2, label %bb3, label %bb19 + +bb3: ; preds = %bb + %tmp4 = load volatile i32, i32* inttoptr (i32 805874688 to i32*), align 1024, !tbaa !23 + store i32 %tmp4, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 0, i32 0), align 4, !tbaa !23 + %tmp5 = load volatile i32, i32* inttoptr (i32 805874692 to i32*), align 4, !tbaa !23 + %tmp6 = and i32 %tmp5, 65535 + store i32 %tmp6, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 0, i32 1), align 4, !tbaa !23 + %tmp7 = load volatile i32, i32* inttoptr (i32 805874696 to i32*), align 8, !tbaa !23 + %tmp8 = and i32 %tmp7, 522133279 + store i32 %tmp8, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 0, i32 2), align 4, !tbaa !23 + %tmp9 = load volatile i32, i32* inttoptr (i32 805874700 to i32*), align 4, !tbaa !23 + %tmp10 = and i32 %tmp9, 522133279 + store i32 %tmp10, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 0, i32 3), align 4, !tbaa !23 + %tmp11 = load volatile i32, i32* inttoptr (i32 805874860 to i32*), align 4, !tbaa !23 + %tmp12 = and i32 %tmp11, 16777215 + store i32 %tmp12, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 15), align 4, !tbaa !24 + %tmp13 = load volatile i32, i32* inttoptr (i32 805874864 to i32*), align 16, !tbaa !23 + %tmp14 = and i32 %tmp13, 16777215 + store i32 %tmp14, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 16), align 4, !tbaa !25 + %tmp15 = load volatile i32, i32* inttoptr (i32 805874868 to i32*), align 4, !tbaa !23 + %tmp16 = and i32 %tmp15, 16777215 + store i32 %tmp16, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 17), align 4, !tbaa !26 + %tmp17 = load volatile i32, i32* inttoptr (i32 805874872 to i32*), align 8, !tbaa !23 + %tmp18 = and i32 %tmp17, 16777215 + store i32 %tmp18, i32* getelementptr inbounds (%struct.blam, %struct.blam* @global, i32 0, i32 13, i32 18), align 4, !tbaa !27 + br label %bb19 + +bb19: ; preds = %bb3, %bb + ret void +} + +attributes #0 = { norecurse nounwind optsize ssp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 1} +!2 = !{!"Snapdragon LLVM ARM Compiler 8.0.0 (based on LLVM 8.0.0)"} +!3 = !{!4, !6, i64 174} +!4 = !{!"", !5, i64 0, !10, i64 6, !12, i64 16, !14, i64 28, !6, i64 48, !9, i64 50, !13, i64 52, !16, i64 56, !17, i64 176, !18, i64 196, !19, i64 240, !6, i64 312, !21, i64 330, !22, i64 332} +!5 = !{!"", !6, i64 0, !6, i64 1, !8, i64 2} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C/C++ TBAA"} +!8 = !{!"", !9, i64 0, !6, i64 2, !6, i64 3} +!9 = !{!"short", !6, i64 0} +!10 = !{!"", !6, i64 0, !6, i64 1, !9, i64 2, !6, i64 4, !6, i64 5, !11, i64 6} +!11 = !{!"", !6, i64 0, !6, i64 1, !6, i64 2, !6, i64 3} +!12 = !{!"", !6, i64 0, !6, i64 1, !13, i64 2, !11, i64 5} +!13 = !{!"", !6, i64 0, !6, i64 1, !6, i64 2} +!14 = !{!"", !6, i64 0, !6, i64 1, !9, i64 2, !15, i64 4, !15, i64 8, !15, i64 12, !15, i64 16} +!15 = !{!"long", !6, i64 0} +!16 = !{!"", !6, i64 0, !6, i64 48, !6, i64 96, !6, i64 112, !6, i64 113, !6, i64 114, !6, i64 115, !6, i64 116, !6, i64 117, !6, i64 118, !6, i64 119} +!17 = !{!"", !9, i64 0, !6, i64 2, !6, i64 3, !6, i64 4, !6, i64 5, !6, i64 6, !6, i64 7, !6, i64 8, !6, i64 9, !6, i64 10, !6, i64 11, !6, i64 12, !6, i64 13, !6, i64 14, !6, i64 15, !9, i64 16} +!18 = !{!"", !15, i64 0, !15, i64 4, !15, i64 8, !15, i64 12, !15, i64 16, !15, i64 20, !9, i64 24, !9, i64 26, !6, i64 28, !6, i64 29, !9, i64 30, !15, i64 32, !15, i64 36, !9, i64 40, !6, i64 42, !6, i64 43} +!19 = !{!"", !15, i64 0, !8, i64 4, !6, i64 8, !6, i64 9, !6, i64 10, !6, i64 11, !15, i64 12, !8, i64 16, !20, i64 20, !6, i64 36, !9, i64 52, !9, i64 54, !6, i64 56, !6, i64 57, !6, i64 58, !6, i64 59, !15, i64 60, !15, i64 64, !15, i64 68} +!20 = !{!"", !6, i64 0, !6, i64 1, !6, i64 2, !6, i64 3, !6, i64 4, !6, i64 5, !6, i64 6, !6, i64 7, !6, i64 8, !6, i64 9, !6, i64 10, !6, i64 11, !6, i64 12, !6, i64 13, !6, i64 14, !6, i64 15} +!21 = !{!"", !6, i64 0, !6, i64 1} +!22 = !{!"", !6, i64 0, !6, i64 20, !6, i64 32, !6, i64 56, !6, i64 68, !6, i64 76, !6, i64 92, !6, i64 104, !6, i64 112, !6, i64 128, !6, i64 148, !6, i64 160, !6, i64 184, !15, i64 188, !15, i64 192, !15, i64 196, !15, i64 200, !15, i64 204, !15, i64 208} +!23 = !{!15, !15, i64 0} +!24 = !{!4, !15, i64 528} +!25 = !{!4, !15, i64 532} +!26 = !{!4, !15, i64 536} +!27 = !{!4, !15, i64 540} Index: test/Transforms/ConstantHoisting/AArch64/const-hoist-gep.ll =================================================================== --- /dev/null +++ test/Transforms/ConstantHoisting/AArch64/const-hoist-gep.ll @@ -0,0 +1,44 @@ +; RUN: opt -consthoist -consthoist-gep -S -o - %s | FileCheck %s + +target triple = "aarch64-none--musleabi" + +; Check that constant GEP expressions are rewritten to one-dimensional +; (single-index) GEPs, whose base poiner is a multi-dimensional GEP. +; CHECK: %const = bitcast i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 0) to i32* +; CHECK-NEXT: store i32 undef, i32* %const, align 4 + +; CHECK-NEXT: %[[BC1:[a-z0-9_]+]] = bitcast i32* %const to i8* +; CHECK-NEXT: %[[M1:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC1]], i32 4 +; CHECK-NEXT: %[[BC2:[a-z0-9_]+]] = bitcast i8* %[[M1]] to i32* +; CHECK-NEXT: store i32 undef, i32* %[[BC2]], align 4 + +; CHECK-NEXT: %[[BC3:[a-z0-9_]+]] = bitcast i32* %const to i8* +; CHECK-NEXT: %[[M2:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC3]], i32 160 +; CHECK-NEXT: %[[BC4:[a-z0-9_]+]] = bitcast i8* %[[M2]] to i32* +; CHECK-NEXT: store i32 undef, i32* %[[BC4]], align 4 + +; CHECK-NEXT: %[[BC5:[a-z0-9_]+]] = bitcast i32* %const to i8* +; CHECK-NEXT: %[[M3:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC5]], i32 164 +; CHECK-NEXT: %[[BC6:[a-z0-9_]+]] = bitcast i8* %[[M3]] to i32* +; CHECK-NEXT: store i32 undef, i32* %[[BC6]], align 4 + +%0 = type { %1, %2, [9 x i16], %6, %7 } +%1 = type { i32, i32, i32, i32, i32, i32, i16, i16, i8, i8, i16, i32, i32, i16, i8, i8 } +%2 = type { i32, %3, i8, i8, i8, i8, i32, %4, %5, [16 x i8], i16, i16, i8, i8, i8, i8, i32, i32, i32 } +%3 = type { i16, i8, i8 } +%4 = type { i16, i8, i8 } +%5 = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } +%6 = type { i8, i8 } +%7 = type { [5 x i32], [3 x i32], [6 x i32], [3 x i32], [2 x i32], [4 x i32], [3 x i32], [2 x i32], [4 x i32], [5 x i32], [3 x i32], [6 x i32], [1 x i32], i32, i32, i32, i32, i32, i32 } + +@global = external dso_local local_unnamed_addr global %0, align 4 + +define dso_local void @zot() { +bb: + store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 0), align 4 + store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 1), align 4 + store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 0), align 4 + store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 1), align 4 + ret void +} + Index: test/Transforms/ConstantHoisting/ARM/const-hoist-gep.ll =================================================================== --- /dev/null +++ test/Transforms/ConstantHoisting/ARM/const-hoist-gep.ll @@ -0,0 +1,42 @@ +; RUN: opt -consthoist -consthoist-gep -S -o - %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m-none--musleabi" + +; Check that constant GEP expressions are rewritten to one-dimensional +; (single-index) GEPs, whose base poiner is a multi-dimensional GEP. +; CHECK-DAG: %[[C1:const[0-9]?]] = bitcast i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 0) to i32* +; CHECK-DAG: %[[C2:const[0-9]?]] = bitcast i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 0) to i32* + +; CHECK: store i32 undef, i32* %[[C2]], align 4 +; CHECK-NEXT: %[[BC1:[a-z0-9_]+]] = bitcast i32* %[[C2]] to i8* +; CHECK-NEXT: %[[M1:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC1]], i32 4 +; CHECK-NEXT: %[[BC2:[a-z0-9_]+]] = bitcast i8* %[[M1]] to i32* +; CHECK-NEXT: store i32 undef, i32* %[[BC2]], align 4 + +; CHECK-NEXT: store i32 undef, i32* %[[C1]], align 4 +; CHECK-NEXT: %[[BC3:[a-z0-9_]+]] = bitcast i32* %[[C1]] to i8* +; CHECK-NEXT: %[[M2:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC3]], i32 4 +; CHECK-NEXT: %[[BC4:[a-z0-9_]+]] = bitcast i8* %[[M2]] to i32* +; CHECK-NEXT: store i32 undef, i32* %[[BC4]], align 4 + +%0 = type { %1, %2, [9 x i16], %6, %7 } +%1 = type { i32, i32, i32, i32, i32, i32, i16, i16, i8, i8, i16, i32, i32, i16, i8, i8 } +%2 = type { i32, %3, i8, i8, i8, i8, i32, %4, %5, [16 x i8], i16, i16, i8, i8, i8, i8, i32, i32, i32 } +%3 = type { i16, i8, i8 } +%4 = type { i16, i8, i8 } +%5 = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } +%6 = type { i8, i8 } +%7 = type { [5 x i32], [3 x i32], [6 x i32], [3 x i32], [2 x i32], [4 x i32], [3 x i32], [2 x i32], [4 x i32], [5 x i32], [3 x i32], [6 x i32], [1 x i32], i32, i32, i32, i32, i32, i32 } + +@global = external dso_local local_unnamed_addr global %0, align 4 + +define dso_local void @zot() { +bb: + store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 0), align 4 + store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 1), align 4 + store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 0), align 4 + store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 1), align 4 + ret void +} + Index: test/Transforms/ConstantHoisting/ARM/same-offset-multi-types.ll =================================================================== --- /dev/null +++ test/Transforms/ConstantHoisting/ARM/same-offset-multi-types.ll @@ -0,0 +1,46 @@ +; RUN: opt -consthoist -consthoist-gep -S -o - %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m-none--musleabi" + +; Check that for the same offset from the base constant, different types are materialized separately. +; The GEP that has less indices is selected as base. +; CHECK: %const = bitcast %4* getelementptr inbounds (%0, %0* @global, i32 0, i32 2) to %4* +; CHECK: %base_bitcast = bitcast %4* %const to i8* +; CHECK: %mat_gep = getelementptr i8, i8* %base_bitcast, i32 0 +; CHECK: %mat_bitcast = bitcast i8* %mat_gep to %5** +; CHECK: %tmp = load %5*, %5** %mat_bitcast, align 4 +; CHECK: tail call void undef(%5* nonnull %tmp, %4* %const) + + +%0 = type { [16 x %1], %2, %4, [16 x %5], %6, %7, i32, [4 x i32], [8 x %3], i8, i8, i8, i8, i8, i8, i8, %8, %11, %11*, i32, i16, i8, i8, i8, i8, i8, i8, [15 x i16], i8, i8, [23 x %12], i8, i8*, i8, %13, i8, i8 } +%1 = type { i32, i32, i8, i8, i8, i8, i8, i8, i8, i8 } +%2 = type { %3*, i16, i16, i16 } +%3 = type { [4 x i32] } +%4 = type { %5*, %5*, i8 } +%5 = type { [4 x i32], i8*, i8, i8 } +%6 = type { i8, [4 x i32] } +%7 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } +%8 = type { [16 x %9], %9*, %9*, %9*, %9*, %11, %11, %11, i8, i8, i8, i8 } +%9 = type { %1, %11, %11, %9*, %9*, %10, i8, i8, i8, i8 } +%10 = type { i32, i16 } +%11 = type { %11*, %11* } +%12 = type { i8, i16, i32 } +%13 = type { i32, i32, i8 } + +@global = external dso_local global %0, align 4 + +; Function Attrs: nounwind optsize ssp +define dso_local void @zot() { +bb: + br i1 undef, label %bb2, label %bb1 + +bb1: ; preds = %bb + %tmp = load %5*, %5** getelementptr inbounds (%0, %0* @global, i32 0, i32 2, i32 0), align 4 + tail call void undef(%5* nonnull %tmp, %4* getelementptr inbounds (%0, %0* @global, i32 0, i32 2)) + unreachable + +bb2: ; preds = %bb + ret void +} +