diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -442,7 +442,9 @@ // // SeparateConstOffsetFromGEP - Split GEPs for better CSE // -FunctionPass *createSeparateConstOffsetFromGEPPass(bool LowerGEP = false); +FunctionPass * +createSeparateConstOffsetFromGEPPass(bool LowerGEP = false, + bool CheckProfitability = false); //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h b/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h --- a/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h +++ b/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h @@ -16,9 +16,12 @@ class SeparateConstOffsetFromGEPPass : public PassInfoMixin { bool LowerGEP; + bool CheckProfitability; public: - SeparateConstOffsetFromGEPPass(bool LowerGEP = false) : LowerGEP(LowerGEP) {} + SeparateConstOffsetFromGEPPass(bool LowerGEP = false, + bool CheckProfitability = false) + : LowerGEP(LowerGEP), CheckProfitability(CheckProfitability) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &); }; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -620,6 +620,11 @@ return parseSinglePassOption(Params, "minimal", "LowerMatrixIntrinsics"); } +Expected parseSeparateConstOffsetFromGEPPassOptions(StringRef Params) { + return parseSinglePassOption(Params, "check-profit", + "SeparateConstOffsetFromGEP"); +} + Expected parseASanPassOptions(StringRef Params) { AddressSanitizerOptions Result; while (!Params.empty()) { diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -357,7 +357,6 @@ FUNCTION_PASS("reg2mem", RegToMemPass()) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("scalarizer", ScalarizerPass()) -FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass()) FUNCTION_PASS("sccp", SCCPPass()) FUNCTION_PASS("sink", SinkingPass()) FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) @@ -441,6 +440,13 @@ "no-sink-common-insts;sink-common-insts;" "bonus-inst-threshold=N" ) +FUNCTION_PASS_WITH_PARAMS("separate-const-offset-from-gep", + "SeparateConstOffsetFromGEPPass", + [](bool CheckProfitability) { + return SeparateConstOffsetFromGEPPass(false, CheckProfitability); + }, + parseSeparateConstOffsetFromGEPPassOptions, + "check-profit") FUNCTION_PASS_WITH_PARAMS("loop-vectorize", "LoopVectorizePass", [](LoopVectorizeOptions Opts) { diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -187,17 +187,21 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include #include +#include #include using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "separate-const-offset-from-gep" + static cl::opt DisableSeparateConstOffsetFromGEP( "disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), @@ -242,7 +246,8 @@ /// it. It returns the numeric value of the extracted constant offset (0 if /// failed). The meaning of the arguments are the same as Extract. static int64_t Find(Value *Idx, GetElementPtrInst *GEP, - const DominatorTree *DT); + const DominatorTree *DT, Value *&NonConstantBaseValue, + bool CheckProfitability = false); private: ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT) @@ -254,20 +259,24 @@ /// successful, returns C and update UserChain as a def-use chain from C to V; /// otherwise, UserChain is empty. /// - /// \p V The given expression - /// \p SignExtended Whether V will be sign-extended in the computation of the - /// GEP index - /// \p ZeroExtended Whether V will be zero-extended in the computation of the - /// GEP index - /// \p NonNegative Whether V is guaranteed to be non-negative. For example, - /// an index of an inbounds GEP is guaranteed to be - /// non-negative. Levaraging this, we can better split - /// inbounds GEPs. - APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative); + /// \p V The given expression + /// \p SignExtended Whether V will be sign-extended in the computation + /// of the GEP index + /// \p ZeroExtended Whether V will be zero-extended in the computation + /// of the GEP index + /// \p NonNegative Whether V is guaranteed to be non-negative. For + /// example, an index of an inbounds GEP is guaranteed + /// to be non-negative. Levaraging this, we can better + /// split inbounds GEPs. + /// \p NonConstantBaseValue The second non-constant operand if V is binary + /// operator. + APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative, + Value *&NonConstantBaseValue, bool CheckProfitability = false); /// A helper function to look into both operands of a binary operator. APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended, - bool ZeroExtended); + bool ZeroExtended, Value *&NonConstantBaseValue, + bool CheckProfitability = false); /// After finding the constant offset C from the GEP index I, we build a new /// index I' s.t. I' + C = I. This function builds and returns the new @@ -340,6 +349,37 @@ const DominatorTree *DT; }; +/// GEPBaseInfo - structure contains information about possible common base for +/// GEP instructions. +struct GEPBaseInfo { + /// Pointer used in GEP instruction. + const Value *GEPPointer; + /// Indexes that precede index that can be optimized. + SmallVector PreviousIndices; + /// Non constant value that will be used in new base GEP. + const Value *NonConstantBaseValue; + + GEPBaseInfo(const Value *GEPPointer, + SmallVector PreviousIndices, + const Value *NonConstantBaseValue) + : GEPPointer(GEPPointer), PreviousIndices(PreviousIndices), + NonConstantBaseValue(NonConstantBaseValue) {} +}; + +/// GEPInfo - structure contains basic information about GEP instruction +/// needed for their modification. +struct GEPInfo { + GetElementPtrInst *GEPInstruction; + int64_t AccumulativeByteOffset; + SmallVector ConstantIndices; + + GEPInfo(GetElementPtrInst *GEPInstruction, int64_t AccumulativeByteOffset, + SmallVector &&Indices) + : GEPInstruction(GEPInstruction), + AccumulativeByteOffset(AccumulativeByteOffset), + ConstantIndices(Indices) {} +}; + /// A pass that tries to split every GEP in the function into a variadic /// base and a constant offset. It is a FunctionPass because searching for the /// constant offset may inspect other basic blocks. @@ -347,8 +387,10 @@ public: static char ID; - SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false) - : FunctionPass(ID), LowerGEP(LowerGEP) { + SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false, + bool CheckProfitability = false) + : FunctionPass(ID), LowerGEP(LowerGEP), + CheckProfitability(CheckProfitability) { initializeSeparateConstOffsetFromGEPLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -366,6 +408,7 @@ private: bool LowerGEP; + bool CheckProfitability; }; /// A pass that tries to split every GEP in the function into a variadic @@ -376,15 +419,21 @@ SeparateConstOffsetFromGEP( DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, TargetLibraryInfo *TLI, - function_ref GetTTI, bool LowerGEP) - : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {} + function_ref GetTTI, bool LowerGEP, + bool CheckProfitability) + : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP), + CheckProfitability(CheckProfitability) {} bool run(Function &F); private: /// Tries to split the given GEP into a variadic base and a constant offset, /// and returns true if the splitting succeeds. - bool splitGEP(GetElementPtrInst *GEP); + bool splitGEP(GetElementPtrInst *GEP, int64_t AccumulativeByteOffset); + + /// Canonize GEP if needed and collect information to decide if GEP + /// modification is useful + bool preprocessGEP(GetElementPtrInst *GEP); /// Lower a GEP with multiple indices into multiple GEPs with a single index. /// Function splitGEP already split the original GEP into a variadic part and @@ -408,10 +457,8 @@ /// Finds the constant offset within each index and accumulates them. If /// LowerGEP is true, it finds in indices of both sequential and structure - /// types, otherwise it only finds in sequential indices. The output - /// NeedsExtraction indicates whether we successfully find a non-zero constant - /// offset. - int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction); + /// types, otherwise it only finds in sequential indices. + void accumulateByteOffset(GetElementPtrInst *GEP); /// Canonicalize array indices to pointer-size integers. This helps to /// simplify the logic of splitting a GEP. For example, if a + b is a @@ -473,12 +520,38 @@ /// multiple GEPs with a single index. bool LowerGEP; + /// Check the possible profit of optimization to reduce register pressure + /// or modify all possible GEPs. + bool CheckProfitability; + DenseMap> DominatingAdds; DenseMap> DominatingSubs; + + /// GEP instructions chosen for transformation + DenseMap> InstructionsToTransform; }; } // end anonymous namespace +template <> struct llvm::DenseMapInfo { + static inline GEPBaseInfo getEmptyKey() { + return GEPBaseInfo(nullptr, SmallVector(), nullptr); + } + static inline GEPBaseInfo getTombstoneKey() { + return GEPBaseInfo((Value *)(-1), SmallVector(), + (Value *)(-1)); + } + static unsigned getHashValue(const GEPBaseInfo &Val) { + return llvm::hash_combine(Val.GEPPointer, Val.PreviousIndices.size(), + Val.NonConstantBaseValue); + } + static bool isEqual(const GEPBaseInfo &LHS, const GEPBaseInfo &RHS) { + return LHS.GEPPointer == RHS.GEPPointer && + LHS.NonConstantBaseValue == RHS.NonConstantBaseValue && + LHS.PreviousIndices == RHS.PreviousIndices; + } +}; + char SeparateConstOffsetFromGEPLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN( @@ -495,8 +568,10 @@ "Split GEPs to a variadic base and a constant offset for better CSE", false, false) -FunctionPass *llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP) { - return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP); +FunctionPass * +llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP, + bool CheckProfitability) { + return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP, CheckProfitability); } bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended, @@ -566,27 +641,47 @@ APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO, bool SignExtended, - bool ZeroExtended) { + bool ZeroExtended, + Value *&NonConstantBaseValue, + bool CheckProfitability) { // Save off the current height of the chain, in case we need to restore it. size_t ChainLength = UserChain.size(); // BO being non-negative does not shed light on whether its operands are // non-negative. Clear the NonNegative flag here. - APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended, - /* NonNegative */ false); + APInt ConstantOffset = + find(BO->getOperand(0), SignExtended, ZeroExtended, + /* NonNegative */ false, NonConstantBaseValue, CheckProfitability); + + // Only sub and add instructions don't need adding extra instructions. + if (CheckProfitability && (BO->getOpcode() != Instruction::Sub || + BO->getOpcode() != Instruction::Add)) { + LLVM_DEBUG(dbgs() << "Reset ConstantOffset\n"); + NonConstantBaseValue = nullptr; + ConstantOffset = 0; + UserChain.resize(ChainLength); + return ConstantOffset; + } + // If we found a constant offset in the left operand, stop and return that. // This shortcut might cause us to miss opportunities of combining the // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9. // However, such cases are probably already handled by -instcombine, // given this pass runs after the standard optimizations. - if (ConstantOffset != 0) return ConstantOffset; + if (ConstantOffset != 0) { + if (!isa(BO->getOperand(1))) { + NonConstantBaseValue = BO->getOperand(1); + } + return ConstantOffset; + } // Reset the chain back to where it was when we started exploring this node, // since visiting the LHS didn't pan out. UserChain.resize(ChainLength); - ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended, - /* NonNegative */ false); + ConstantOffset = + find(BO->getOperand(1), SignExtended, ZeroExtended, + /* NonNegative */ false, NonConstantBaseValue, CheckProfitability); // If U is a sub operator, negate the constant offset found in the right // operand. if (BO->getOpcode() == Instruction::Sub) @@ -596,11 +691,17 @@ if (ConstantOffset == 0) UserChain.resize(ChainLength); + if (!isa(BO->getOperand(0))) { + NonConstantBaseValue = BO->getOperand(0); + } + return ConstantOffset; } APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended, - bool ZeroExtended, bool NonNegative) { + bool ZeroExtended, bool NonNegative, + Value *&NonConstantBaseValue, + bool CheckProfitability) { // TODO(jingyue): We could trace into integer/pointer casts, such as // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only // integers because it gives good enough results for our benchmarks. @@ -617,22 +718,27 @@ } else if (BinaryOperator *BO = dyn_cast(V)) { // Trace into subexpressions for more hoisting opportunities. if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) - ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended); + ConstantOffset = + findInEitherOperand(BO, SignExtended, ZeroExtended, + NonConstantBaseValue, CheckProfitability); } else if (isa(V)) { - ConstantOffset = - find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative) - .trunc(BitWidth); + ConstantOffset = find(U->getOperand(0), SignExtended, ZeroExtended, + NonNegative, NonConstantBaseValue, CheckProfitability) + .trunc(BitWidth); } else if (isa(V)) { - ConstantOffset = find(U->getOperand(0), /* SignExtended */ true, - ZeroExtended, NonNegative).sext(BitWidth); + ConstantOffset = + find(U->getOperand(0), /* SignExtended */ true, ZeroExtended, + NonNegative, NonConstantBaseValue, CheckProfitability) + .sext(BitWidth); } else if (isa(V)) { // As an optimization, we can clear the SignExtended flag because // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll. // // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0. - ConstantOffset = - find(U->getOperand(0), /* SignExtended */ false, - /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth); + ConstantOffset = find(U->getOperand(0), /* SignExtended */ false, + /* ZeroExtended */ true, /* NonNegative */ false, + NonConstantBaseValue, CheckProfitability) + .zext(BitWidth); } // If we found a non-zero constant offset, add it to the path for @@ -768,10 +874,11 @@ User *&UserChainTail, const DominatorTree *DT) { ConstantOffsetExtractor Extractor(GEP, DT); + Value *NonConstantBaseValue = nullptr; // Find a non-zero constant offset first. APInt ConstantOffset = Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, - GEP->isInBounds()); + GEP->isInBounds(), NonConstantBaseValue); if (ConstantOffset == 0) { UserChainTail = nullptr; return nullptr; @@ -783,11 +890,13 @@ } int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP, - const DominatorTree *DT) { + const DominatorTree *DT, + Value *&NonConstantBaseValue, + bool CheckProfitability) { // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative. return ConstantOffsetExtractor(GEP, DT) .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, - GEP->isInBounds()) + GEP->isInBounds(), NonConstantBaseValue, CheckProfitability) .getSExtValue(); } @@ -809,37 +918,78 @@ return Changed; } -int64_t -SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, - bool &NeedsExtraction) { - NeedsExtraction = false; +void SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP) { int64_t AccumulativeByteOffset = 0; gep_type_iterator GTI = gep_type_begin(*GEP); + SmallVector ConstantIndices; + SmallVector PossibleBases; + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { + Value *NonConstantBaseValue = nullptr; if (GTI.isSequential()) { // Tries to extract a constant offset from this GEP index. - int64_t ConstantOffset = - ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT); + int64_t ConstantOffset = ConstantOffsetExtractor::Find( + GEP->getOperand(I), GEP, DT, NonConstantBaseValue, + CheckProfitability); if (ConstantOffset != 0) { - NeedsExtraction = true; + if (CheckProfitability || PossibleBases.empty()) { + PossibleBases.emplace_back( + GEP->getPointerOperand(), + SmallVector(GEP->idx_begin(), + GEP->idx_begin() + I - 1), + NonConstantBaseValue); + } + // A GEP may have multiple indices. We accumulate the extracted // constant offset to a byte offset, and later offset the remainder of // the original GEP with this byte offset. AccumulativeByteOffset += ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType()); + ConstantIndices.push_back(GEP->getOperand(I)); } } else if (LowerGEP) { StructType *StTy = GTI.getStructType(); uint64_t Field = cast(GEP->getOperand(I))->getZExtValue(); // Skip field 0 as the offset is always 0. if (Field != 0) { - NeedsExtraction = true; + if (CheckProfitability || PossibleBases.empty()) { + PossibleBases.emplace_back(GEP->getPointerOperand(), + SmallVector(), + NonConstantBaseValue); + } AccumulativeByteOffset += DL->getStructLayout(StTy)->getElementOffset(Field); } } } - return AccumulativeByteOffset; + TargetTransformInfo &TTI = GetTTI(*GEP->getFunction()); + + // If LowerGEP is disabled, before really splitting the GEP, check whether the + // backend supports the addressing mode we are about to produce. If no, this + // splitting probably won't be beneficial. + // If LowerGEP is enabled, even the extracted constant offset can not match + // the addressing mode, we can still do optimizations to other lowered parts + // of variable indices. Therefore, we don't check for addressing modes in that + // case. + if (!LowerGEP) { + unsigned AddrSpace = GEP->getPointerAddressSpace(); + if (!TTI.isLegalAddressingMode(GEP->getResultElementType(), + /*BaseGV=*/nullptr, AccumulativeByteOffset, + /*HasBaseReg=*/true, /*Scale=*/0, + AddrSpace)) { + LLVM_DEBUG( + dbgs() + << "Don't optimize. The backend doesn't support the addressing mode\n"); + return; + } + } + for (const GEPBaseInfo &Base : PossibleBases) { + if (InstructionsToTransform.find(Base) == InstructionsToTransform.end()) { + InstructionsToTransform[Base] = SmallVector(); + } + InstructionsToTransform[Base].emplace_back(GEP, AccumulativeByteOffset, + std::move(ConstantIndices)); + } } void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( @@ -913,9 +1063,8 @@ Variadic->eraseFromParent(); } -void -SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, - int64_t AccumulativeByteOffset) { +void SeparateConstOffsetFromGEP::lowerToArithmetics( + GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) { IRBuilder<> Builder(Variadic); Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); @@ -959,7 +1108,7 @@ Variadic->eraseFromParent(); } -bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { +bool SeparateConstOffsetFromGEP::preprocessGEP(GetElementPtrInst *GEP) { // Skip vector GEPs. if (GEP->getType()->isVectorTy()) return false; @@ -971,31 +1120,15 @@ bool Changed = canonicalizeArrayIndicesToPointerSize(GEP); - bool NeedsExtraction; - int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction); + accumulateByteOffset(GEP); - if (!NeedsExtraction) - return Changed; + return Changed; +} +bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP, + int64_t AccumulativeByteOffset) { TargetTransformInfo &TTI = GetTTI(*GEP->getFunction()); - // If LowerGEP is disabled, before really splitting the GEP, check whether the - // backend supports the addressing mode we are about to produce. If no, this - // splitting probably won't be beneficial. - // If LowerGEP is enabled, even the extracted constant offset can not match - // the addressing mode, we can still do optimizations to other lowered parts - // of variable indices. Therefore, we don't check for addressing modes in that - // case. - if (!LowerGEP) { - unsigned AddrSpace = GEP->getPointerAddressSpace(); - if (!TTI.isLegalAddressingMode(GEP->getResultElementType(), - /*BaseGV=*/nullptr, AccumulativeByteOffset, - /*HasBaseReg=*/true, /*Scale=*/0, - AddrSpace)) { - return Changed; - } - } - // Remove the constant offset in each sequential index. The resultant GEP // computes the variadic base. // Notice that we don't remove struct field indices here. If LowerGEP is @@ -1152,7 +1285,8 @@ auto GetTTI = [this](Function &F) -> TargetTransformInfo & { return this->getAnalysis().getTTI(F); }; - SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP); + SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP, + CheckProfitability); return Impl.run(F); } @@ -1162,15 +1296,85 @@ DL = &F.getParent()->getDataLayout(); bool Changed = false; + + auto OnlyUsedInGEP = [](const GEPInfo &GEPInfo) { + bool OnlyUsedInGEP = GEPInfo.ConstantIndices.empty(); + for (const Value *Index : GEPInfo.ConstantIndices) { + uint64_t NumUses = Index->getNumUses(); + // In case of cast instruction check usages of both result and original + // value. + if (isa(Index) || isa(Index) || + isa(Index)) { + NumUses += cast(Index)->getOperand(0)->getNumUses() - 1; + } + OnlyUsedInGEP |= NumUses == 1; + } + return OnlyUsedInGEP; + }; + + LLVM_DEBUG(dbgs() << "========= Function " << F.getName() << " =========\n"); for (BasicBlock &B : F) { + InstructionsToTransform.clear(); if (!DT->isReachableFromEntry(&B)) continue; for (Instruction &I : llvm::make_early_inc_range(B)) if (GetElementPtrInst *GEP = dyn_cast(&I)) - Changed |= splitGEP(GEP); - // No need to split GEP ConstantExprs because all its indices are constant - // already. + Changed |= preprocessGEP(GEP); + + if (!CheckProfitability) { + for (const auto &GEPInfoPair : InstructionsToTransform) { + for (const auto &GEPInfo : GEPInfoPair.second) { + Changed |= + splitGEP(GEPInfo.GEPInstruction, GEPInfo.AccumulativeByteOffset); + // No need to split GEP ConstantExprs because all its indices are + // constant already. + } + } + } else { + // As far as one instruction can be optimized using different base, choose + // the best base option based on possible effect for decreasing register + // pressure. Sort all found bases in decreasing order of possible effect. + SmallVector, unsigned>> + SortedInstructionsList; + for (const auto &GEPInfoPair : InstructionsToTransform) { + unsigned DeadValuesNumber = count_if(GEPInfoPair.second, OnlyUsedInGEP); + if (DeadValuesNumber > 0) { + SortedInstructionsList.emplace_back(GEPInfoPair.second, + DeadValuesNumber); + } + } + sort(SortedInstructionsList, [&OnlyUsedInGEP](auto &LHS, auto &RHS) { + return LHS.second > RHS.second || LHS.first.size() > RHS.first.size(); + }); + + // Optimize all chosen GEPs + for (unsigned I = 0; I < SortedInstructionsList.size(); I++) { + auto DetailedInfoList = SortedInstructionsList[I].first; + if (DetailedInfoList.size() > 1 && + any_of(DetailedInfoList, OnlyUsedInGEP)) { + for (const auto &GEPInfo : DetailedInfoList) { + LLVM_DEBUG(dbgs() << "Try to split GEP " << *GEPInfo.GEPInstruction + << "\n"); + bool CurrentChanged = splitGEP(GEPInfo.GEPInstruction, + GEPInfo.AccumulativeByteOffset); + Changed |= CurrentChanged; + // If GEP is already optimized remove it from lists connected with + // other bases. + for (unsigned J = I + 1; + J < SortedInstructionsList.size() && CurrentChanged; J++) { + auto RemoveIt = remove_if(SortedInstructionsList[J].first, + [&GEPInfo](const struct GEPInfo &Info) { + return Info.GEPInstruction == + GEPInfo.GEPInstruction; + }); + SortedInstructionsList[J].first.erase( + RemoveIt, SortedInstructionsList[J].first.end()); + } + } + } + } + } } Changed |= reuniteExts(F); @@ -1378,7 +1582,8 @@ auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & { return AM.getResult(F); }; - SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP); + SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP, + CheckProfitability); if (!Impl.run(F)) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll @@ -1,11 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=riscv64-unknown-elf -passes='separate-const-offset-from-gep,early-cse' \ +; RUN: opt < %s -mtriple=riscv64-unknown-elf -passes='separate-const-offset-from-gep,early-cse' \ ; RUN: -S | FileCheck %s ; Several tests for -separate-const-offset-from-gep. The transformation ; heavily relies on TargetTransformInfo, so we put these tests under ; target-specific folders. +; Positive test ; Simple case when GEPs should be optimized. define i64 @test1(i64* %array, i64 %i, i64 %j) { ; CHECK-LABEL: @test1( @@ -33,6 +34,7 @@ ret i64 undef } +; Positive test ; Optimize GEPs when there sext instructions are needed to cast index value to expected type. define i32 @test2(i32* %array, i32 %i, i32 %j) { ; CHECK-LABEL: @test2( @@ -64,22 +66,24 @@ ret i32 undef } +; Negative test ; No need to modify because all values are also used in other expressions. ; Modification doesn't decrease register pressure. define i32 @test3(i32* %array, i32 %i) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[I:%.*]], 5 -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[ARRAY:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5 -; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP2]], align 4 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[ADD]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[SEXT]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP]], align 4 ; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[I]], 6 -; CHECK-NEXT: [[GEP54:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6 -; CHECK-NEXT: store i32 [[ADD3]], i32* [[GEP54]], align 4 +; CHECK-NEXT: [[SEXT4:%.*]] = sext i32 [[ADD3]] to i64 +; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds i32, i32* [[ARRAY]], i64 [[SEXT4]] +; CHECK-NEXT: store i32 [[ADD3]], i32* [[GEP5]], align 4 ; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[I]], 35 -; CHECK-NEXT: [[GEP86:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 35 -; CHECK-NEXT: store i32 [[ADD6]], i32* [[GEP86]], align 4 +; CHECK-NEXT: [[SEXT7:%.*]] = sext i32 [[ADD6]] to i64 +; CHECK-NEXT: [[GEP8:%.*]] = getelementptr inbounds i32, i32* [[ARRAY]], i64 [[SEXT7]] +; CHECK-NEXT: store i32 [[ADD6]], i32* [[GEP8]], align 4 ; CHECK-NEXT: ret i32 undef ; entry: @@ -98,6 +102,7 @@ ret i32 undef } +; Positive test ; Optimized GEPs for multidimensional array with same base define i32 @test4([50 x i32]* %array2, i32 %i) { ; CHECK-LABEL: @test4( @@ -163,6 +168,7 @@ ret i32 undef } +; Negative test ; No need to optimize GEPs, because there is critical amount with non-constant offsets. define i64 @test6(i64* %array, i64 %i, i64 %j) { ; CHECK-LABEL: @test6( @@ -170,10 +176,11 @@ ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[I:%.*]], 5 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, i64* [[ARRAY:%.*]], i64 [[J:%.*]] ; CHECK-NEXT: store i64 [[ADD]], i64* [[GEP]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, i64* [[ARRAY]], i64 [[I]] -; CHECK-NEXT: [[GEP52:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 6 -; CHECK-NEXT: store i64 [[I]], i64* [[GEP52]], align 4 -; CHECK-NEXT: store i64 [[I]], i64* [[TMP0]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i64 [[I]], 6 +; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds i64, i64* [[ARRAY]], i64 [[ADD3]] +; CHECK-NEXT: store i64 [[I]], i64* [[GEP5]], align 4 +; CHECK-NEXT: [[GEP8:%.*]] = getelementptr inbounds i64, i64* [[ARRAY]], i64 [[I]] +; CHECK-NEXT: store i64 [[I]], i64* [[GEP8]], align 4 ; CHECK-NEXT: ret i64 undef ; entry: @@ -189,23 +196,23 @@ ret i64 undef } +; Negative test ; No need to optimize GEPs, because the base variable is different. define i32 @test7(i32* %array, i32 %i, i32 %j, i32 %k) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[I:%.*]], 5 -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[ARRAY:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5 -; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP2]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[K:%.*]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[ARRAY]], i64 [[TMP2]] -; CHECK-NEXT: [[GEP54:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 6 -; CHECK-NEXT: store i32 [[I]], i32* [[GEP54]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[J:%.*]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[ARRAY]], i64 [[TMP4]] -; CHECK-NEXT: [[GEP86:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i64 35 -; CHECK-NEXT: store i32 [[I]], i32* [[GEP86]], align 4 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[ADD]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[SEXT]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[K:%.*]], 6 +; CHECK-NEXT: [[SEXT4:%.*]] = sext i32 [[ADD3]] to i64 +; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds i32, i32* [[ARRAY]], i64 [[SEXT4]] +; CHECK-NEXT: store i32 [[I]], i32* [[GEP5]], align 4 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[J:%.*]], 35 +; CHECK-NEXT: [[SEXT7:%.*]] = sext i32 [[ADD6]] to i64 +; CHECK-NEXT: [[GEP8:%.*]] = getelementptr inbounds i32, i32* [[ARRAY]], i64 [[SEXT7]] +; CHECK-NEXT: store i32 [[I]], i32* [[GEP8]], align 4 ; CHECK-NEXT: ret i32 undef ; entry: @@ -224,21 +231,23 @@ ret i32 undef } +; Negative test ; No need to optimize GEPs, because the base of GEP instructions is different. define i32 @test8(i32* %array, i32* %array2, i32* %array3, i32 %i) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[I:%.*]], 5 -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[ARRAY:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5 -; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP2]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[ARRAY2:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[GEP54:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 6 -; CHECK-NEXT: store i32 [[I]], i32* [[GEP54]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[ARRAY3:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[GEP86:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 35 -; CHECK-NEXT: store i32 [[I]], i32* [[GEP86]], align 4 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[ADD]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[SEXT]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[I]], 6 +; CHECK-NEXT: [[SEXT4:%.*]] = sext i32 [[ADD3]] to i64 +; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds i32, i32* [[ARRAY2:%.*]], i64 [[SEXT4]] +; CHECK-NEXT: store i32 [[I]], i32* [[GEP5]], align 4 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[I]], 35 +; CHECK-NEXT: [[SEXT7:%.*]] = sext i32 [[ADD6]] to i64 +; CHECK-NEXT: [[GEP8:%.*]] = getelementptr inbounds i32, i32* [[ARRAY3:%.*]], i64 [[SEXT7]] +; CHECK-NEXT: store i32 [[I]], i32* [[GEP8]], align 4 ; CHECK-NEXT: ret i32 undef ; entry: @@ -257,20 +266,24 @@ ret i32 undef } +; Negative test ; No need to optimize GEPs of multidimensional array, because the base of GEP instructions is different. define i32 @test9([50 x i32]* %array, i32 %i) { ; CHECK-LABEL: @test9( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[I:%.*]], 5 -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [50 x i32], [50 x i32]* [[ARRAY:%.*]], i64 0, i64 [[TMP0]] -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5 -; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP2]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [50 x i32], [50 x i32]* [[ARRAY]], i64 [[TMP0]], i64 [[TMP0]] -; CHECK-NEXT: [[GEP54:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 6 -; CHECK-NEXT: store i32 [[I]], i32* [[GEP54]], align 4 -; CHECK-NEXT: [[GEP87:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 335 -; CHECK-NEXT: store i32 [[I]], i32* [[GEP87]], align 4 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[ADD]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARRAY:%.*]], i64 0, i64 [[SEXT]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[I]], 6 +; CHECK-NEXT: [[SEXT4:%.*]] = sext i32 [[ADD3]] to i64 +; CHECK-NEXT: [[INT:%.*]] = sext i32 [[I]] to i64 +; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARRAY]], i64 [[INT]], i64 [[SEXT4]] +; CHECK-NEXT: store i32 [[I]], i32* [[GEP5]], align 4 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[I]], 35 +; CHECK-NEXT: [[SEXT7:%.*]] = sext i32 [[ADD6]] to i64 +; CHECK-NEXT: [[GEP8:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARRAY]], i64 [[SEXT4]], i64 [[SEXT7]] +; CHECK-NEXT: store i32 [[I]], i32* [[GEP8]], align 4 ; CHECK-NEXT: ret i32 undef ; entry: