diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -442,7 +442,9 @@ // // SeparateConstOffsetFromGEP - Split GEPs for better CSE // -FunctionPass *createSeparateConstOffsetFromGEPPass(bool LowerGEP = false); +FunctionPass * +createSeparateConstOffsetFromGEPPass(bool LowerGEP = false, + bool CheckProfitability = false); //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h b/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h --- a/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h +++ b/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h @@ -16,9 +16,12 @@ class SeparateConstOffsetFromGEPPass : public PassInfoMixin { bool LowerGEP; + bool CheckProfitability; public: - SeparateConstOffsetFromGEPPass(bool LowerGEP = false) : LowerGEP(LowerGEP) {} + SeparateConstOffsetFromGEPPass(bool LowerGEP = false, + bool CheckProfitability = false) + : LowerGEP(LowerGEP), CheckProfitability(CheckProfitability) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &); }; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -620,6 +620,11 @@ return parseSinglePassOption(Params, "minimal", "LowerMatrixIntrinsics"); } +Expected parseSeparateConstOffsetFromGEPPassOptions(StringRef Params) { + return parseSinglePassOption(Params, "check-profit", + "SeparateConstOffsetFromGEP"); +} + Expected parseASanPassOptions(StringRef Params) { AddressSanitizerOptions Result; while (!Params.empty()) { diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -357,7 +357,6 @@ FUNCTION_PASS("reg2mem", RegToMemPass()) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("scalarizer", ScalarizerPass()) -FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass()) FUNCTION_PASS("sccp", SCCPPass()) FUNCTION_PASS("sink", SinkingPass()) FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) @@ -441,6 +440,13 @@ "no-sink-common-insts;sink-common-insts;" "bonus-inst-threshold=N" ) +FUNCTION_PASS_WITH_PARAMS("separate-const-offset-from-gep", + "SeparateConstOffsetFromGEPPass", + [](bool CheckProfitability) { + return SeparateConstOffsetFromGEPPass(false, CheckProfitability); + }, + parseSeparateConstOffsetFromGEPPassOptions, + "check-profit") FUNCTION_PASS_WITH_PARAMS("loop-vectorize", "LoopVectorizePass", [](LoopVectorizeOptions Opts) { diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; static cl::opt EnableRedundantCopyElimination( @@ -167,7 +168,15 @@ addPass(createAtomicExpandPass()); addPass(createRISCVGatherScatterLoweringPass()); - + if (TM->getOptLevel() == CodeGenOpt::Aggressive) { + addPass(createSeparateConstOffsetFromGEPPass(false, true)); + // Call EarlyCSE pass to find and remove subexpressions in the lowered + // result. + addPass(createEarlyCSEPass()); + // Do loop invariant code motion in case part of the lowered result is + // invariant. + addPass(createLICMPass()); + } TargetPassConfig::addIRPasses(); } diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -187,17 +187,21 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include #include +#include #include using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "separate-const-offset-from-gep" + static cl::opt DisableSeparateConstOffsetFromGEP( "disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), @@ -242,7 +246,7 @@ /// it. It returns the numeric value of the extracted constant offset (0 if /// failed). The meaning of the arguments are the same as Extract. static int64_t Find(Value *Idx, GetElementPtrInst *GEP, - const DominatorTree *DT); + const DominatorTree *DT, Value *&NonConstantBaseValue); private: ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT) @@ -254,20 +258,23 @@ /// successful, returns C and update UserChain as a def-use chain from C to V; /// otherwise, UserChain is empty. /// - /// \p V The given expression - /// \p SignExtended Whether V will be sign-extended in the computation of the - /// GEP index - /// \p ZeroExtended Whether V will be zero-extended in the computation of the - /// GEP index - /// \p NonNegative Whether V is guaranteed to be non-negative. For example, - /// an index of an inbounds GEP is guaranteed to be - /// non-negative. Levaraging this, we can better split - /// inbounds GEPs. - APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative); + /// \p V The given expression + /// \p SignExtended Whether V will be sign-extended in the computation + /// of the GEP index + /// \p ZeroExtended Whether V will be zero-extended in the computation + /// of the GEP index + /// \p NonNegative Whether V is guaranteed to be non-negative. For + /// example, an index of an inbounds GEP is guaranteed + /// to be non-negative. Levaraging this, we can better + /// split inbounds GEPs. + /// \p NonConstantBaseValue The second non-constant operand if V is binary + /// operator. + APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative, + Value *&NonConstantBaseValue); /// A helper function to look into both operands of a binary operator. APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended, - bool ZeroExtended); + bool ZeroExtended, Value *&NonConstantBaseValue); /// After finding the constant offset C from the GEP index I, we build a new /// index I' s.t. I' + C = I. This function builds and returns the new @@ -340,6 +347,44 @@ const DominatorTree *DT; }; +/// GEPBaseInfo - structure contains information about possible common base for +/// GEP instructions. +struct GEPBaseInfo { + /// Pointer used in GEP instruction. + const Value *GEPPointer; + /// Indexes that precede index that can be optimized. + SmallVector PreviousIndices; + /// Non constant value that will be used in new base GEP. + const Value *NonConstantBaseValue; + + GEPBaseInfo(const Value *GEPPointer, + SmallVector PreviousIndices, + const Value *NonConstantBaseValue) + : GEPPointer(GEPPointer), PreviousIndices(PreviousIndices), + NonConstantBaseValue(NonConstantBaseValue) {} + + inline bool operator<(const GEPBaseInfo &rhs) const { + + return GEPPointer < rhs.GEPPointer || + NonConstantBaseValue < rhs.NonConstantBaseValue || + PreviousIndices < rhs.PreviousIndices; + } +}; + +/// GEPInfo - structure contains basic information about GEP instruction +/// needed for their modification. +struct GEPInfo { + GetElementPtrInst *GEPInstruction; + int64_t AccumulativeByteOffset; + SmallVector ConstantIndices; + + GEPInfo(GetElementPtrInst *GEPInstruction, int64_t AccumulativeByteOffset, + SmallVector &&Indices) + : GEPInstruction(GEPInstruction), + AccumulativeByteOffset(AccumulativeByteOffset), + ConstantIndices(Indices) {} +}; + /// A pass that tries to split every GEP in the function into a variadic /// base and a constant offset. It is a FunctionPass because searching for the /// constant offset may inspect other basic blocks. @@ -347,8 +392,10 @@ public: static char ID; - SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false) - : FunctionPass(ID), LowerGEP(LowerGEP) { + SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false, + bool CheckProfitability = false) + : FunctionPass(ID), LowerGEP(LowerGEP), + CheckProfitability(CheckProfitability) { initializeSeparateConstOffsetFromGEPLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -366,6 +413,7 @@ private: bool LowerGEP; + bool CheckProfitability; }; /// A pass that tries to split every GEP in the function into a variadic @@ -376,15 +424,21 @@ SeparateConstOffsetFromGEP( DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, TargetLibraryInfo *TLI, - function_ref GetTTI, bool LowerGEP) - : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {} + function_ref GetTTI, bool LowerGEP, + bool CheckProfitability) + : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP), + CheckProfitability(CheckProfitability) {} bool run(Function &F); private: /// Tries to split the given GEP into a variadic base and a constant offset, /// and returns true if the splitting succeeds. - bool splitGEP(GetElementPtrInst *GEP); + bool splitGEP(GetElementPtrInst *GEP, int64_t AccumulativeByteOffset); + + /// Canonize GEP if needed and collect information to decide if GEP + /// modification is useful + bool preprocessGEP(GetElementPtrInst *GEP); /// Lower a GEP with multiple indices into multiple GEPs with a single index. /// Function splitGEP already split the original GEP into a variadic part and @@ -408,10 +462,8 @@ /// Finds the constant offset within each index and accumulates them. If /// LowerGEP is true, it finds in indices of both sequential and structure - /// types, otherwise it only finds in sequential indices. The output - /// NeedsExtraction indicates whether we successfully find a non-zero constant - /// offset. - int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction); + /// types, otherwise it only finds in sequential indices. + void accumulateByteOffset(GetElementPtrInst *GEP); /// Canonicalize array indices to pointer-size integers. This helps to /// simplify the logic of splitting a GEP. For example, if a + b is a @@ -473,8 +525,15 @@ /// multiple GEPs with a single index. bool LowerGEP; + /// Check the possible profit of optimization to reduce register pressure + /// or modify all possible GEPs. + bool CheckProfitability; + DenseMap> DominatingAdds; DenseMap> DominatingSubs; + + /// GEP instructions chosen for transformation + std::map> InstructionsToTransform; }; } // end anonymous namespace @@ -495,8 +554,10 @@ "Split GEPs to a variadic base and a constant offset for better CSE", false, false) -FunctionPass *llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP) { - return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP); +FunctionPass * +llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP, + bool CheckProfitability) { + return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP, CheckProfitability); } bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended, @@ -564,29 +625,34 @@ return true; } -APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO, - bool SignExtended, - bool ZeroExtended) { +APInt ConstantOffsetExtractor::findInEitherOperand( + BinaryOperator *BO, bool SignExtended, bool ZeroExtended, + Value *&NonConstantBaseValue) { // Save off the current height of the chain, in case we need to restore it. size_t ChainLength = UserChain.size(); // BO being non-negative does not shed light on whether its operands are // non-negative. Clear the NonNegative flag here. APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended, - /* NonNegative */ false); + /* NonNegative */ false, NonConstantBaseValue); // If we found a constant offset in the left operand, stop and return that. // This shortcut might cause us to miss opportunities of combining the // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9. // However, such cases are probably already handled by -instcombine, // given this pass runs after the standard optimizations. - if (ConstantOffset != 0) return ConstantOffset; + if (ConstantOffset != 0) { + if (!isa(BO->getOperand(1))) { + NonConstantBaseValue = BO->getOperand(1); + } + return ConstantOffset; + } // Reset the chain back to where it was when we started exploring this node, // since visiting the LHS didn't pan out. UserChain.resize(ChainLength); ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended, - /* NonNegative */ false); + /* NonNegative */ false, NonConstantBaseValue); // If U is a sub operator, negate the constant offset found in the right // operand. if (BO->getOpcode() == Instruction::Sub) @@ -596,11 +662,16 @@ if (ConstantOffset == 0) UserChain.resize(ChainLength); + if (!isa(BO->getOperand(0))) { + NonConstantBaseValue = BO->getOperand(0); + } + return ConstantOffset; } APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended, - bool ZeroExtended, bool NonNegative) { + bool ZeroExtended, bool NonNegative, + Value *&NonConstantBaseValue) { // TODO(jingyue): We could trace into integer/pointer casts, such as // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only // integers because it gives good enough results for our benchmarks. @@ -617,22 +688,25 @@ } else if (BinaryOperator *BO = dyn_cast(V)) { // Trace into subexpressions for more hoisting opportunities. if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) - ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended); + ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended, + NonConstantBaseValue); } else if (isa(V)) { - ConstantOffset = - find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative) - .trunc(BitWidth); + ConstantOffset = find(U->getOperand(0), SignExtended, ZeroExtended, + NonNegative, NonConstantBaseValue) + .trunc(BitWidth); } else if (isa(V)) { ConstantOffset = find(U->getOperand(0), /* SignExtended */ true, - ZeroExtended, NonNegative).sext(BitWidth); + ZeroExtended, NonNegative, NonConstantBaseValue) + .sext(BitWidth); } else if (isa(V)) { // As an optimization, we can clear the SignExtended flag because // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll. // // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0. - ConstantOffset = - find(U->getOperand(0), /* SignExtended */ false, - /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth); + ConstantOffset = find(U->getOperand(0), /* SignExtended */ false, + /* ZeroExtended */ true, /* NonNegative */ false, + NonConstantBaseValue) + .zext(BitWidth); } // If we found a non-zero constant offset, add it to the path for @@ -768,10 +842,11 @@ User *&UserChainTail, const DominatorTree *DT) { ConstantOffsetExtractor Extractor(GEP, DT); + Value *NonConstantBaseValue = nullptr; // Find a non-zero constant offset first. APInt ConstantOffset = Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, - GEP->isInBounds()); + GEP->isInBounds(), NonConstantBaseValue); if (ConstantOffset == 0) { UserChainTail = nullptr; return nullptr; @@ -783,11 +858,12 @@ } int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP, - const DominatorTree *DT) { + const DominatorTree *DT, + Value *&NonConstantBaseValue) { // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative. return ConstantOffsetExtractor(GEP, DT) .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, - GEP->isInBounds()) + GEP->isInBounds(), NonConstantBaseValue) .getSExtValue(); } @@ -809,37 +885,56 @@ return Changed; } -int64_t -SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, - bool &NeedsExtraction) { - NeedsExtraction = false; +void SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP) { int64_t AccumulativeByteOffset = 0; gep_type_iterator GTI = gep_type_begin(*GEP); + SmallVector ConstantIndices; + SmallVector PossibleBases; + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { + Value *NonConstantBaseValue = nullptr; if (GTI.isSequential()) { // Tries to extract a constant offset from this GEP index. - int64_t ConstantOffset = - ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT); + int64_t ConstantOffset = ConstantOffsetExtractor::Find( + GEP->getOperand(I), GEP, DT, NonConstantBaseValue); if (ConstantOffset != 0) { - NeedsExtraction = true; + if (CheckProfitability || PossibleBases.size() == 0) { + PossibleBases.emplace_back( + GEP->getPointerOperand(), + SmallVector(GEP->idx_begin(), + GEP->idx_begin() + I - 1), + NonConstantBaseValue); + } + // A GEP may have multiple indices. We accumulate the extracted // constant offset to a byte offset, and later offset the remainder of // the original GEP with this byte offset. AccumulativeByteOffset += ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType()); + ConstantIndices.push_back(GEP->getOperand(I)); } } else if (LowerGEP) { StructType *StTy = GTI.getStructType(); uint64_t Field = cast(GEP->getOperand(I))->getZExtValue(); // Skip field 0 as the offset is always 0. if (Field != 0) { - NeedsExtraction = true; + if (CheckProfitability || PossibleBases.size() == 0) { + PossibleBases.emplace_back(GEP->getPointerOperand(), + SmallVector(), + NonConstantBaseValue); + } AccumulativeByteOffset += DL->getStructLayout(StTy)->getElementOffset(Field); } } } - return AccumulativeByteOffset; + for (const GEPBaseInfo &Base : PossibleBases) { + if (InstructionsToTransform.find(Base) == InstructionsToTransform.end()) { + InstructionsToTransform[Base] = SmallVector(); + } + InstructionsToTransform[Base].emplace_back(GEP, AccumulativeByteOffset, + std::move(ConstantIndices)); + } } void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( @@ -913,9 +1008,8 @@ Variadic->eraseFromParent(); } -void -SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, - int64_t AccumulativeByteOffset) { +void SeparateConstOffsetFromGEP::lowerToArithmetics( + GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) { IRBuilder<> Builder(Variadic); Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); @@ -959,7 +1053,7 @@ Variadic->eraseFromParent(); } -bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { +bool SeparateConstOffsetFromGEP::preprocessGEP(GetElementPtrInst *GEP) { // Skip vector GEPs. if (GEP->getType()->isVectorTy()) return false; @@ -971,11 +1065,14 @@ bool Changed = canonicalizeArrayIndicesToPointerSize(GEP); - bool NeedsExtraction; - int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction); + accumulateByteOffset(GEP); - if (!NeedsExtraction) - return Changed; + return Changed; +} + +bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP, + int64_t AccumulativeByteOffset) { + bool Changed = false; TargetTransformInfo &TTI = GetTTI(*GEP->getFunction()); @@ -992,6 +1089,9 @@ /*BaseGV=*/nullptr, AccumulativeByteOffset, /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace)) { + LLVM_DEBUG( + dbgs() + << "Don't optimize. The backend supports the addressing mode \n"); return Changed; } } @@ -1152,7 +1252,8 @@ auto GetTTI = [this](Function &F) -> TargetTransformInfo & { return this->getAnalysis().getTTI(F); }; - SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP); + SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP, + CheckProfitability); return Impl.run(F); } @@ -1162,15 +1263,85 @@ DL = &F.getParent()->getDataLayout(); bool Changed = false; + + auto OnlyUsedInGEP = [](const GEPInfo &GEPInfo) { + bool OnlyUsedInGEP = GEPInfo.ConstantIndices.empty(); + for (const Value *Index : GEPInfo.ConstantIndices) { + uint64_t NumUses = Index->getNumUses(); + // In case of cast instruction check usages of both result and original + // value. + if (isa(Index) || isa(Index) || + isa(Index)) { + NumUses += cast(Index)->getOperand(0)->getNumUses() - 1; + } + OnlyUsedInGEP |= NumUses == 1; + } + return OnlyUsedInGEP; + }; + + LLVM_DEBUG(dbgs() << "========= Function " << F.getName() << " =========\n"); for (BasicBlock &B : F) { + InstructionsToTransform.clear(); if (!DT->isReachableFromEntry(&B)) continue; for (Instruction &I : llvm::make_early_inc_range(B)) if (GetElementPtrInst *GEP = dyn_cast(&I)) - Changed |= splitGEP(GEP); - // No need to split GEP ConstantExprs because all its indices are constant - // already. + Changed |= preprocessGEP(GEP); + + if (!CheckProfitability) { + for (const auto &GEPInfoPair : InstructionsToTransform) { + for (const auto &GEPInfo : GEPInfoPair.second) { + Changed |= + splitGEP(GEPInfo.GEPInstruction, GEPInfo.AccumulativeByteOffset); + // No need to split GEP ConstantExprs because all its indices are + // constant already. + } + } + } else { + // As far as one instruction can be optimized using different base, choose + // the best base option based on possible effect for decreasing register + // pressure. Sort all found bases in decreasing order of possible effect. + SmallVector, unsigned>> + SortedInstructionsList; + for (const auto &GEPInfoPair : InstructionsToTransform) { + unsigned DeadValuesNumber = count_if(GEPInfoPair.second, OnlyUsedInGEP); + if (DeadValuesNumber > 0) { + SortedInstructionsList.emplace_back(GEPInfoPair.second, + DeadValuesNumber); + } + } + sort(SortedInstructionsList, [&OnlyUsedInGEP](auto &LHS, auto &RHS) { + return LHS.second > RHS.second || LHS.first.size() > RHS.first.size(); + }); + + // Optimize all chosen GEPs + for (unsigned I = 0; I < SortedInstructionsList.size(); I++) { + auto DetailedInfoList = SortedInstructionsList[I].first; + if (DetailedInfoList.size() > 1 && + any_of(DetailedInfoList, OnlyUsedInGEP)) { + for (const auto &GEPInfo : DetailedInfoList) { + LLVM_DEBUG(dbgs() << "Try to split GEP " << *GEPInfo.GEPInstruction + << "\n"); + bool CurrentChanged = splitGEP(GEPInfo.GEPInstruction, + GEPInfo.AccumulativeByteOffset); + Changed |= CurrentChanged; + // If GEP is already optimized remove it from lists connected with + // other bases. + for (unsigned J = I + 1; + J < SortedInstructionsList.size() && CurrentChanged; J++) { + auto RemoveIt = remove_if(SortedInstructionsList[J].first, + [&GEPInfo](const struct GEPInfo &Info) { + return Info.GEPInstruction == + GEPInfo.GEPInstruction; + }); + SortedInstructionsList[J].first.erase( + RemoveIt, SortedInstructionsList[J].first.end()); + } + } + } + } + } } Changed |= reuniteExts(F); @@ -1378,7 +1549,8 @@ auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & { return AM.getResult(F); }; - SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP); + SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP, + CheckProfitability); if (!Impl.run(F)) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/lit.local.cfg b/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'RISCV' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll @@ -0,0 +1,298 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -mtriple=riscv64-unknown-elf -passes='separate-const-offset-from-gep,early-cse' \ +; RUN: -S | FileCheck %s + +; Several tests for -separate-const-offset-from-gep. The transformation +; heavily relies on TargetTransformInfo, so we put these tests under +; target-specific folders. + +; Simple case when GEPs should be optimized. +define dso_local signext i64 @test1(i64* nocapture noundef %Arr_1, i64 noundef signext %Int_1, i64 noundef signext %Int_2) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[INT_1:%.*]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, i64* [[ARR_1:%.*]], i64 [[INT_1]] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 5 +; CHECK-NEXT: store i64 [[INT_2:%.*]], i64* [[ARRAYIDX4]], align 4 +; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 6 +; CHECK-NEXT: store i64 [[INT_2]], i64* [[ARRAYIDX26]], align 4 +; CHECK-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 35 +; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX38]], align 4 +; CHECK-NEXT: ret i64 undef +; +entry: + %add = add nsw i64 %Int_1, 5 + %arrayidx = getelementptr inbounds i64, i64* %Arr_1, i64 %add + store i64 %Int_2, i64* %arrayidx + %add2 = add nsw i64 %Int_1, 6 + %arrayidx2 = getelementptr inbounds i64, i64* %Arr_1, i64 %add2 + store i64 %Int_2, i64* %arrayidx2 + %add3 = add nsw i64 %Int_1, 35 + %arrayidx3 = getelementptr inbounds i64, i64* %Arr_1, i64 %add3 + store i64 %add, i64* %arrayidx3 + ret i64 undef +} + +; Optimize GEPs when there sext instructions are needed to cast index value to expected type. +define dso_local signext i32 @test2(i32* nocapture noundef %Arr_1, i32 noundef signext %Int_1, i32 noundef signext %Int_2) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INT_1]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[ARR_1:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5 +; CHECK-NEXT: store i32 [[INT_2:%.*]], i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ARRAYIDX54:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6 +; CHECK-NEXT: store i32 [[INT_2]], i32* [[ARRAYIDX54]], align 4 +; CHECK-NEXT: [[ARRAYIDX86:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 35 +; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX86]], align 4 +; CHECK-NEXT: ret i32 undef +; +entry: + %add = add nsw i32 %Int_1, 5 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom + store i32 %Int_2, i32* %arrayidx + %add3 = add nsw i32 %Int_1, 6 + %idxprom4 = sext i32 %add3 to i64 + %arrayidx5 = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom4 + store i32 %Int_2, i32* %arrayidx5 + %add6 = add nsw i32 %Int_1, 35 + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom7 + store i32 %add, i32* %arrayidx8 + ret i32 undef +} + +; No need to modify because all values are also used in other expressions. +; Modification doesn't decrease register pressure. +define dso_local signext i32 @test3(i32* nocapture noundef %Arr_1, i32 noundef signext %Int_1) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR_1:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[INT_1]], 6 +; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[ADD3]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[ARR_1]], i64 [[IDXPROM4]] +; CHECK-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[INT_1]], 35 +; CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[ADD6]] to i64 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[ARR_1]], i64 [[IDXPROM7]] +; CHECK-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX8]], align 4 +; CHECK-NEXT: ret i32 undef +; +entry: + %add = add nsw i32 %Int_1, 5 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom + store i32 %add, i32* %arrayidx + %add3 = add nsw i32 %Int_1, 6 + %idxprom4 = sext i32 %add3 to i64 + %arrayidx5 = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom4 + store i32 %add3, i32* %arrayidx5 + %add6 = add nsw i32 %Int_1, 35 + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom7 + store i32 %add6, i32* %arrayidx8 + ret i32 undef +} + +; Optimized GEPs for multidimensional array with same base +define dso_local signext i32 @test4([50 x i32]* nocapture noundef %Arr_2, i32 noundef signext %Int_1) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INT_1]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [50 x i32], [50 x i32]* [[ARR_2:%.*]], i64 [[TMP0]], i64 [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 255 +; CHECK-NEXT: store i32 [[INT_1]], i32* [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[ARRAYIDX56:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 256 +; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX56]], align 4 +; CHECK-NEXT: [[ARRAYIDX89:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 285 +; CHECK-NEXT: store i32 [[INT_1]], i32* [[ARRAYIDX89]], align 4 +; CHECK-NEXT: ret i32 undef +; +entry: + %add = add nsw i32 %Int_1, 5 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_2, i64 %idxprom, i64 %idxprom + store i32 %Int_1, i32* %arrayidx + %add3 = add nsw i32 %Int_1, 6 + %idxprom4 = sext i32 %add3 to i64 + %arrayidx5 = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_2, i64 %idxprom, i64 %idxprom4 + store i32 %add, i32* %arrayidx5 + %add6 = add nsw i32 %Int_1, 35 + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_2, i64 %idxprom, i64 %idxprom7 + store i32 %Int_1, i32* %arrayidx8 + ret i32 undef +} + +; Don't optimize GEPs for multidimensional array with same base because RISC-V doesn't support the addressing mode +define dso_local signext i32 @test5([50 x i32]* nocapture noundef %Arr_2, i32 noundef signext %Int_1, i64 noundef signext %Int_2) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INT_1]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [50 x i32], [50 x i32]* [[ARR_2:%.*]], i64 [[TMP0]], i64 [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 255 +; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [50 x i32], [50 x i32]* [[ARR_2]], i64 [[TMP0]], i64 [[INT_2:%.*]] +; CHECK-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 300 +; CHECK-NEXT: store i32 [[INT_1]], i32* [[ARRAYIDX55]], align 4 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[INT_1]], 35 +; CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[ADD6]] to i64 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARR_2]], i64 [[IDXPROM7]], i64 [[INT_2]] +; CHECK-NEXT: store i32 [[INT_1]], i32* [[ARRAYIDX8]], align 4 +; CHECK-NEXT: ret i32 undef +; +entry: + %add = add nsw i32 %Int_1, 5 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_2, i64 %idxprom, i64 %idxprom + store i32 %add, i32* %arrayidx + %add3 = add nsw i32 %Int_1, 6 + %idxprom4 = sext i32 %add3 to i64 + %arrayidx5 = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_2, i64 %idxprom4, i64 %Int_2 + store i32 %Int_1, i32* %arrayidx5 + %add6 = add nsw i32 %Int_1, 35 + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_2, i64 %idxprom7, i64 %Int_2 + store i32 %Int_1, i32* %arrayidx8 + ret i32 undef +} + +; No need to optimize GEPs, because there is critical amount with non-constant offsets. +define dso_local signext i64 @test6(i64* nocapture noundef %Arr_1, i64 noundef signext %Int_1, i64 noundef signext %Int_2) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[INT_1:%.*]], 5 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[ARR_1:%.*]], i64 [[INT_2:%.*]] +; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i64 [[INT_1]], 6 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ARR_1]], i64 [[ADD3]] +; CHECK-NEXT: store i64 [[INT_1]], i64* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i64, i64* [[ARR_1]], i64 [[INT_1]] +; CHECK-NEXT: store i64 [[INT_1]], i64* [[ARRAYIDX8]], align 4 +; CHECK-NEXT: ret i64 undef +; +entry: + %add = add nsw i64 %Int_1, 5 + %arrayidx = getelementptr inbounds i64, i64* %Arr_1, i64 %Int_2 + store i64 %add, i64* %arrayidx + %add3 = add nsw i64 %Int_1, 6 + %arrayidx5 = getelementptr inbounds i64, i64* %Arr_1, i64 %add3 + store i64 %Int_1, i64* %arrayidx5 + %add6 = add nsw i64 %Int_1, 35 + %arrayidx8 = getelementptr inbounds i64, i64* %Arr_1, i64 %Int_1 + store i64 %Int_1, i64* %arrayidx8 + ret i64 undef +} + +; No need to optimize GEPs, because the base variable is different. +define dso_local signext i32 @test7(i32* nocapture noundef %Arr_1, i32 noundef signext %Int_1, i32 noundef signext %Int_2, +; CHECK-LABEL: @test7( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR_1:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[INT_3:%.*]], 6 +; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[ADD3]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[ARR_1]], i64 [[IDXPROM4]] +; CHECK-NEXT: store i32 [[INT_1]], i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[INT_2:%.*]], 35 +; CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[ADD6]] to i64 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[ARR_1]], i64 [[IDXPROM7]] +; CHECK-NEXT: store i32 [[INT_1]], i32* [[ARRAYIDX8]], align 4 +; CHECK-NEXT: ret i32 undef +; + i32 noundef signext %Int_3) { +entry: + %add = add nsw i32 %Int_1, 5 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom + store i32 %add, i32* %arrayidx + %add3 = add nsw i32 %Int_3, 6 + %idxprom4 = sext i32 %add3 to i64 + %arrayidx5 = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom4 + store i32 %Int_1, i32* %arrayidx5 + %add6 = add nsw i32 %Int_2, 35 + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom7 + store i32 %Int_1, i32* %arrayidx8 + ret i32 undef +} + +; No need to optimize GEPs, because the base of GEP instructions is different. +define dso_local signext i32 @test8(i32* nocapture noundef %Arr_1, i32* nocapture noundef %Arr_2, +; CHECK-LABEL: @test8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR_1:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[INT_1]], 6 +; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[ADD3]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[ARR_2:%.*]], i64 [[IDXPROM4]] +; CHECK-NEXT: store i32 [[INT_1]], i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[INT_1]], 35 +; CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[ADD6]] to i64 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[ARR_3:%.*]], i64 [[IDXPROM7]] +; CHECK-NEXT: store i32 [[INT_1]], i32* [[ARRAYIDX8]], align 4 +; CHECK-NEXT: ret i32 undef +; + i32* nocapture noundef %Arr_3, i32 noundef signext %Int_1) { +entry: + %add = add nsw i32 %Int_1, 5 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom + store i32 %add, i32* %arrayidx + %add3 = add nsw i32 %Int_1, 6 + %idxprom4 = sext i32 %add3 to i64 + %arrayidx5 = getelementptr inbounds i32, i32* %Arr_2, i64 %idxprom4 + store i32 %Int_1, i32* %arrayidx5 + %add6 = add nsw i32 %Int_1, 35 + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds i32, i32* %Arr_3, i64 %idxprom7 + store i32 %Int_1, i32* %arrayidx8 + ret i32 undef +} + +; No need to optimize GEPs of multidimensional array, because the base of GEP instructions is different. +define dso_local signext i32 @test9([50 x i32]* nocapture noundef %Arr_1, i32 noundef signext %Int_1) { +; CHECK-LABEL: @test9( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARR_1:%.*]], i64 0, i64 [[IDXPROM]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[INT_1]], 6 +; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[ADD3]] to i64 +; CHECK-NEXT: [[INT:%.*]] = sext i32 [[INT_1]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARR_1]], i64 [[INT]], i64 [[IDXPROM4]] +; CHECK-NEXT: store i32 [[INT_1]], i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[INT_1]], 35 +; CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[ADD6]] to i64 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARR_1]], i64 [[IDXPROM4]], i64 [[IDXPROM7]] +; CHECK-NEXT: store i32 [[INT_1]], i32* [[ARRAYIDX8]], align 4 +; CHECK-NEXT: ret i32 undef +; +entry: + %add = add nsw i32 %Int_1, 5 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_1, i64 0, i64 %idxprom + store i32 %add, i32* %arrayidx + %add3 = add nsw i32 %Int_1, 6 + %idxprom4 = sext i32 %add3 to i64 + %Int = sext i32 %Int_1 to i64 + %arrayidx5 = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_1, i64 %Int, i64 %idxprom4 + store i32 %Int_1, i32* %arrayidx5 + %add6 = add nsw i32 %Int_1, 35 + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_1, i64 %idxprom4, i64 %idxprom7 + store i32 %Int_1, i32* %arrayidx8 + ret i32 undef +}