diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -442,7 +442,9 @@
 //
 // SeparateConstOffsetFromGEP - Split GEPs for better CSE
 //
-FunctionPass *createSeparateConstOffsetFromGEPPass(bool LowerGEP = false);
+FunctionPass *
+createSeparateConstOffsetFromGEPPass(bool LowerGEP = false,
+                                     bool CheckProfitability = false);
 
 //===----------------------------------------------------------------------===//
 //
diff --git a/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h b/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h
--- a/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h
+++ b/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h
@@ -16,9 +16,12 @@
 class SeparateConstOffsetFromGEPPass
     : public PassInfoMixin<SeparateConstOffsetFromGEPPass> {
   bool LowerGEP;
+  bool CheckProfitability;
 
 public:
-  SeparateConstOffsetFromGEPPass(bool LowerGEP = false) : LowerGEP(LowerGEP) {}
+  SeparateConstOffsetFromGEPPass(bool LowerGEP = false,
+                                 bool CheckProfitability = false)
+      : LowerGEP(LowerGEP), CheckProfitability(CheckProfitability) {}
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
 };
 
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -620,6 +620,11 @@
   return parseSinglePassOption(Params, "minimal", "LowerMatrixIntrinsics");
 }
 
+Expected<bool> parseSeparateConstOffsetFromGEPPassOptions(StringRef Params) {
+  return parseSinglePassOption(Params, "check-profit",
+                               "SeparateConstOffsetFromGEP");
+}
+
 Expected<AddressSanitizerOptions> parseASanPassOptions(StringRef Params) {
   AddressSanitizerOptions Result;
   while (!Params.empty()) {
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -357,7 +357,6 @@
 FUNCTION_PASS("reg2mem", RegToMemPass())
 FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass())
 FUNCTION_PASS("scalarizer", ScalarizerPass())
-FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass())
 FUNCTION_PASS("sccp", SCCPPass())
 FUNCTION_PASS("sink", SinkingPass())
 FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
@@ -441,6 +440,13 @@
                           "no-sink-common-insts;sink-common-insts;"
                           "bonus-inst-threshold=N"
                           )
+FUNCTION_PASS_WITH_PARAMS("separate-const-offset-from-gep",
+                          "SeparateConstOffsetFromGEPPass",
+                          [](bool CheckProfitability) {
+                            return SeparateConstOffsetFromGEPPass(false, CheckProfitability);
+                          },
+                          parseSeparateConstOffsetFromGEPPassOptions,
+                          "check-profit")
 FUNCTION_PASS_WITH_PARAMS("loop-vectorize",
                           "LoopVectorizePass",
                            [](LoopVectorizeOptions Opts) {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 static cl::opt<bool> EnableRedundantCopyElimination(
@@ -167,7 +168,15 @@
   addPass(createAtomicExpandPass());
 
   addPass(createRISCVGatherScatterLoweringPass());
-
+  if (TM->getOptLevel() == CodeGenOpt::Aggressive) {
+    addPass(createSeparateConstOffsetFromGEPPass(false, true));
+    // Call EarlyCSE pass to find and remove subexpressions in the lowered
+    // result.
+    addPass(createEarlyCSEPass());
+    // Do loop invariant code motion in case part of the lowered result is
+    // invariant.
+    addPass(createLICMPass());
+  }
   TargetPassConfig::addIRPasses();
 }
 
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -187,17 +187,21 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <cstdint>
+#include <map>
 #include <string>
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
+#define DEBUG_TYPE "separate-const-offset-from-gep"
+
 static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
     "disable-separate-const-offset-from-gep", cl::init(false),
     cl::desc("Do not separate the constant offset from a GEP instruction"),
@@ -242,7 +246,7 @@
   /// it. It returns the numeric value of the extracted constant offset (0 if
   /// failed). The meaning of the arguments are the same as Extract.
   static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
-                      const DominatorTree *DT);
+                      const DominatorTree *DT, Value *&NonConstantBaseValue);
 
 private:
   ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
@@ -254,20 +258,23 @@
   /// successful, returns C and update UserChain as a def-use chain from C to V;
   /// otherwise, UserChain is empty.
   ///
-  /// \p V            The given expression
-  /// \p SignExtended Whether V will be sign-extended in the computation of the
-  ///                 GEP index
-  /// \p ZeroExtended Whether V will be zero-extended in the computation of the
-  ///                 GEP index
-  /// \p NonNegative  Whether V is guaranteed to be non-negative. For example,
-  ///                 an index of an inbounds GEP is guaranteed to be
-  ///                 non-negative. Levaraging this, we can better split
-  ///                 inbounds GEPs.
-  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
+  /// \p V                    The given expression
+  /// \p SignExtended         Whether V will be sign-extended in the computation
+  ///                         of the GEP index
+  /// \p ZeroExtended         Whether V will be zero-extended in the computation
+  ///                         of the GEP index
+  /// \p NonNegative          Whether V is guaranteed to be non-negative. For
+  ///                         example, an index of an inbounds GEP is guaranteed
+  ///                         to be non-negative. Levaraging this, we can better
+  ///                         split inbounds GEPs.
+  /// \p NonConstantBaseValue The second non-constant operand if V is binary
+  ///                         operator.
+  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative,
+             Value *&NonConstantBaseValue);
 
   /// A helper function to look into both operands of a binary operator.
   APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
-                            bool ZeroExtended);
+                            bool ZeroExtended, Value *&NonConstantBaseValue);
 
   /// After finding the constant offset C from the GEP index I, we build a new
   /// index I' s.t. I' + C = I. This function builds and returns the new
@@ -340,6 +347,44 @@
   const DominatorTree *DT;
 };
 
+/// GEPBaseInfo - structure contains information about possible common base for
+/// GEP instructions.
+struct GEPBaseInfo {
+  /// Pointer used in GEP instruction.
+  const Value *GEPPointer;
+  /// Indexes that precede index that can be optimized.
+  SmallVector<const Value *> PreviousIndices;
+  /// Non constant value that will be used in new base GEP.
+  const Value *NonConstantBaseValue;
+
+  GEPBaseInfo(const Value *GEPPointer,
+              SmallVector<const Value *> PreviousIndices,
+              const Value *NonConstantBaseValue)
+      : GEPPointer(GEPPointer), PreviousIndices(PreviousIndices),
+        NonConstantBaseValue(NonConstantBaseValue) {}
+
+  inline bool operator<(const GEPBaseInfo &rhs) const {
+
+    return GEPPointer < rhs.GEPPointer ||
+           NonConstantBaseValue < rhs.NonConstantBaseValue ||
+           PreviousIndices < rhs.PreviousIndices;
+  }
+};
+
+/// GEPInfo - structure contains basic information about GEP instruction
+/// needed for their modification.
+struct GEPInfo {
+  GetElementPtrInst *GEPInstruction;
+  int64_t AccumulativeByteOffset;
+  SmallVector<const Value *> ConstantIndices;
+
+  GEPInfo(GetElementPtrInst *GEPInstruction, int64_t AccumulativeByteOffset,
+          SmallVector<const Value *> &&Indices)
+      : GEPInstruction(GEPInstruction),
+        AccumulativeByteOffset(AccumulativeByteOffset),
+        ConstantIndices(Indices) {}
+};
+
 /// A pass that tries to split every GEP in the function into a variadic
 /// base and a constant offset. It is a FunctionPass because searching for the
 /// constant offset may inspect other basic blocks.
@@ -347,8 +392,10 @@
 public:
   static char ID;
 
-  SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
-      : FunctionPass(ID), LowerGEP(LowerGEP) {
+  SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false,
+                                       bool CheckProfitability = false)
+      : FunctionPass(ID), LowerGEP(LowerGEP),
+        CheckProfitability(CheckProfitability) {
     initializeSeparateConstOffsetFromGEPLegacyPassPass(
         *PassRegistry::getPassRegistry());
   }
@@ -366,6 +413,7 @@
 
 private:
   bool LowerGEP;
+  bool CheckProfitability;
 };
 
 /// A pass that tries to split every GEP in the function into a variadic
@@ -376,15 +424,21 @@
   SeparateConstOffsetFromGEP(
       DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI,
       TargetLibraryInfo *TLI,
-      function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
-      : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
+      function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP,
+      bool CheckProfitability)
+      : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP),
+        CheckProfitability(CheckProfitability) {}
 
   bool run(Function &F);
 
 private:
   /// Tries to split the given GEP into a variadic base and a constant offset,
   /// and returns true if the splitting succeeds.
-  bool splitGEP(GetElementPtrInst *GEP);
+  bool splitGEP(GetElementPtrInst *GEP, int64_t AccumulativeByteOffset);
+
+  /// Canonize GEP if needed and collect information to decide if GEP
+  /// modification is useful
+  bool preprocessGEP(GetElementPtrInst *GEP);
 
   /// Lower a GEP with multiple indices into multiple GEPs with a single index.
   /// Function splitGEP already split the original GEP into a variadic part and
@@ -408,10 +462,8 @@
 
   /// Finds the constant offset within each index and accumulates them. If
   /// LowerGEP is true, it finds in indices of both sequential and structure
-  /// types, otherwise it only finds in sequential indices. The output
-  /// NeedsExtraction indicates whether we successfully find a non-zero constant
-  /// offset.
-  int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
+  /// types, otherwise it only finds in sequential indices.
+  void accumulateByteOffset(GetElementPtrInst *GEP);
 
   /// Canonicalize array indices to pointer-size integers. This helps to
   /// simplify the logic of splitting a GEP. For example, if a + b is a
@@ -473,8 +525,15 @@
   /// multiple GEPs with a single index.
   bool LowerGEP;
 
+  /// Check the possible profit of optimization to reduce register pressure
+  /// or modify all possible GEPs.
+  bool CheckProfitability;
+
   DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingAdds;
   DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingSubs;
+
+  /// GEP instructions chosen for transformation
+  std::map<GEPBaseInfo, SmallVector<GEPInfo>> InstructionsToTransform;
 };
 
 } // end anonymous namespace
@@ -495,8 +554,10 @@
     "Split GEPs to a variadic base and a constant offset for better CSE", false,
     false)
 
-FunctionPass *llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP) {
-  return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
+FunctionPass *
+llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP,
+                                           bool CheckProfitability) {
+  return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP, CheckProfitability);
 }
 
 bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
@@ -564,29 +625,34 @@
   return true;
 }
 
-APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
-                                                   bool SignExtended,
-                                                   bool ZeroExtended) {
+APInt ConstantOffsetExtractor::findInEitherOperand(
+    BinaryOperator *BO, bool SignExtended, bool ZeroExtended,
+    Value *&NonConstantBaseValue) {
   // Save off the current height of the chain, in case we need to restore it.
   size_t ChainLength = UserChain.size();
 
   // BO being non-negative does not shed light on whether its operands are
   // non-negative. Clear the NonNegative flag here.
   APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
-                              /* NonNegative */ false);
+                              /* NonNegative */ false, NonConstantBaseValue);
   // If we found a constant offset in the left operand, stop and return that.
   // This shortcut might cause us to miss opportunities of combining the
   // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
   // However, such cases are probably already handled by -instcombine,
   // given this pass runs after the standard optimizations.
-  if (ConstantOffset != 0) return ConstantOffset;
+  if (ConstantOffset != 0) {
+    if (!isa<ConstantInt>(BO->getOperand(1))) {
+      NonConstantBaseValue = BO->getOperand(1);
+    }
+    return ConstantOffset;
+  }
 
   // Reset the chain back to where it was when we started exploring this node,
   // since visiting the LHS didn't pan out.
   UserChain.resize(ChainLength);
 
   ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
-                        /* NonNegative */ false);
+                        /* NonNegative */ false, NonConstantBaseValue);
   // If U is a sub operator, negate the constant offset found in the right
   // operand.
   if (BO->getOpcode() == Instruction::Sub)
@@ -596,11 +662,16 @@
   if (ConstantOffset == 0)
     UserChain.resize(ChainLength);
 
+  if (!isa<ConstantInt>(BO->getOperand(0))) {
+    NonConstantBaseValue = BO->getOperand(0);
+  }
+
   return ConstantOffset;
 }
 
 APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
-                                    bool ZeroExtended, bool NonNegative) {
+                                    bool ZeroExtended, bool NonNegative,
+                                    Value *&NonConstantBaseValue) {
   // TODO(jingyue): We could trace into integer/pointer casts, such as
   // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
   // integers because it gives good enough results for our benchmarks.
@@ -617,22 +688,25 @@
   } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
     // Trace into subexpressions for more hoisting opportunities.
     if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
-      ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
+      ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended,
+                                           NonConstantBaseValue);
   } else if (isa<TruncInst>(V)) {
-    ConstantOffset =
-        find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
-            .trunc(BitWidth);
+    ConstantOffset = find(U->getOperand(0), SignExtended, ZeroExtended,
+                          NonNegative, NonConstantBaseValue)
+                         .trunc(BitWidth);
   } else if (isa<SExtInst>(V)) {
     ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
-                          ZeroExtended, NonNegative).sext(BitWidth);
+                          ZeroExtended, NonNegative, NonConstantBaseValue)
+                         .sext(BitWidth);
   } else if (isa<ZExtInst>(V)) {
     // As an optimization, we can clear the SignExtended flag because
     // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
     //
     // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
-    ConstantOffset =
-        find(U->getOperand(0), /* SignExtended */ false,
-             /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
+    ConstantOffset = find(U->getOperand(0), /* SignExtended */ false,
+                          /* ZeroExtended */ true, /* NonNegative */ false,
+                          NonConstantBaseValue)
+                         .zext(BitWidth);
   }
 
   // If we found a non-zero constant offset, add it to the path for
@@ -768,10 +842,11 @@
                                         User *&UserChainTail,
                                         const DominatorTree *DT) {
   ConstantOffsetExtractor Extractor(GEP, DT);
+  Value *NonConstantBaseValue = nullptr;
   // Find a non-zero constant offset first.
   APInt ConstantOffset =
       Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
-                     GEP->isInBounds());
+                     GEP->isInBounds(), NonConstantBaseValue);
   if (ConstantOffset == 0) {
     UserChainTail = nullptr;
     return nullptr;
@@ -783,11 +858,12 @@
 }
 
 int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP,
-                                      const DominatorTree *DT) {
+                                      const DominatorTree *DT,
+                                      Value *&NonConstantBaseValue) {
   // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
   return ConstantOffsetExtractor(GEP, DT)
       .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
-            GEP->isInBounds())
+            GEP->isInBounds(), NonConstantBaseValue)
       .getSExtValue();
 }
 
@@ -809,37 +885,56 @@
   return Changed;
 }
 
-int64_t
-SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
-                                                 bool &NeedsExtraction) {
-  NeedsExtraction = false;
+void SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP) {
   int64_t AccumulativeByteOffset = 0;
   gep_type_iterator GTI = gep_type_begin(*GEP);
+  SmallVector<const Value *> ConstantIndices;
+  SmallVector<GEPBaseInfo, 2> PossibleBases;
+
   for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+    Value *NonConstantBaseValue = nullptr;
     if (GTI.isSequential()) {
       // Tries to extract a constant offset from this GEP index.
-      int64_t ConstantOffset =
-          ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
+      int64_t ConstantOffset = ConstantOffsetExtractor::Find(
+          GEP->getOperand(I), GEP, DT, NonConstantBaseValue);
       if (ConstantOffset != 0) {
-        NeedsExtraction = true;
+        if (CheckProfitability || PossibleBases.size() == 0) {
+          PossibleBases.emplace_back(
+              GEP->getPointerOperand(),
+              SmallVector<const Value *, 4>(GEP->idx_begin(),
+                                            GEP->idx_begin() + I - 1),
+              NonConstantBaseValue);
+        }
+
         // A GEP may have multiple indices.  We accumulate the extracted
         // constant offset to a byte offset, and later offset the remainder of
         // the original GEP with this byte offset.
         AccumulativeByteOffset +=
             ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
+        ConstantIndices.push_back(GEP->getOperand(I));
       }
     } else if (LowerGEP) {
       StructType *StTy = GTI.getStructType();
       uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
       // Skip field 0 as the offset is always 0.
       if (Field != 0) {
-        NeedsExtraction = true;
+        if (CheckProfitability || PossibleBases.size() == 0) {
+          PossibleBases.emplace_back(GEP->getPointerOperand(),
+                                     SmallVector<const Value *>(),
+                                     NonConstantBaseValue);
+        }
         AccumulativeByteOffset +=
             DL->getStructLayout(StTy)->getElementOffset(Field);
       }
     }
   }
-  return AccumulativeByteOffset;
+  for (const GEPBaseInfo &Base : PossibleBases) {
+    if (InstructionsToTransform.find(Base) == InstructionsToTransform.end()) {
+      InstructionsToTransform[Base] = SmallVector<GEPInfo>();
+    }
+    InstructionsToTransform[Base].emplace_back(GEP, AccumulativeByteOffset,
+                                               std::move(ConstantIndices));
+  }
 }
 
 void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
@@ -913,9 +1008,8 @@
   Variadic->eraseFromParent();
 }
 
-void
-SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
-                                               int64_t AccumulativeByteOffset) {
+void SeparateConstOffsetFromGEP::lowerToArithmetics(
+    GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
   IRBuilder<> Builder(Variadic);
   Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
 
@@ -959,7 +1053,7 @@
   Variadic->eraseFromParent();
 }
 
-bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
+bool SeparateConstOffsetFromGEP::preprocessGEP(GetElementPtrInst *GEP) {
   // Skip vector GEPs.
   if (GEP->getType()->isVectorTy())
     return false;
@@ -971,11 +1065,14 @@
 
   bool Changed = canonicalizeArrayIndicesToPointerSize(GEP);
 
-  bool NeedsExtraction;
-  int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
+  accumulateByteOffset(GEP);
 
-  if (!NeedsExtraction)
-    return Changed;
+  return Changed;
+}
+
+bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP,
+                                          int64_t AccumulativeByteOffset) {
+  bool Changed = false;
 
   TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
 
@@ -992,6 +1089,9 @@
                                    /*BaseGV=*/nullptr, AccumulativeByteOffset,
                                    /*HasBaseReg=*/true, /*Scale=*/0,
                                    AddrSpace)) {
+      LLVM_DEBUG(
+          dbgs()
+          << "Don't optimize. The backend supports the addressing mode \n");
       return Changed;
     }
   }
@@ -1152,7 +1252,8 @@
   auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
     return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
   };
-  SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
+  SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP,
+                                  CheckProfitability);
   return Impl.run(F);
 }
 
@@ -1162,15 +1263,85 @@
 
   DL = &F.getParent()->getDataLayout();
   bool Changed = false;
+
+  auto OnlyUsedInGEP = [](const GEPInfo &GEPInfo) {
+    bool OnlyUsedInGEP = GEPInfo.ConstantIndices.empty();
+    for (const Value *Index : GEPInfo.ConstantIndices) {
+      uint64_t NumUses = Index->getNumUses();
+      // In case of cast instruction check usages of both result and original
+      // value.
+      if (isa<SExtInst>(Index) || isa<ZExtInst>(Index) ||
+          isa<TruncInst>(Index)) {
+        NumUses += cast<Instruction>(Index)->getOperand(0)->getNumUses() - 1;
+      }
+      OnlyUsedInGEP |= NumUses == 1;
+    }
+    return OnlyUsedInGEP;
+  };
+
+  LLVM_DEBUG(dbgs() << "========= Function " << F.getName() << " =========\n");
   for (BasicBlock &B : F) {
+    InstructionsToTransform.clear();
     if (!DT->isReachableFromEntry(&B))
       continue;
 
     for (Instruction &I : llvm::make_early_inc_range(B))
       if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
-        Changed |= splitGEP(GEP);
-    // No need to split GEP ConstantExprs because all its indices are constant
-    // already.
+        Changed |= preprocessGEP(GEP);
+
+    if (!CheckProfitability) {
+      for (const auto &GEPInfoPair : InstructionsToTransform) {
+        for (const auto &GEPInfo : GEPInfoPair.second) {
+          Changed |=
+              splitGEP(GEPInfo.GEPInstruction, GEPInfo.AccumulativeByteOffset);
+          // No need to split GEP ConstantExprs because all its indices are
+          // constant already.
+        }
+      }
+    } else {
+      // As far as one instruction can be optimized using different base, choose
+      // the best base option based on possible effect for decreasing register
+      // pressure. Sort all found bases in decreasing order of possible effect.
+      SmallVector<std::pair<SmallVector<GEPInfo>, unsigned>>
+          SortedInstructionsList;
+      for (const auto &GEPInfoPair : InstructionsToTransform) {
+        unsigned DeadValuesNumber = count_if(GEPInfoPair.second, OnlyUsedInGEP);
+        if (DeadValuesNumber > 0) {
+          SortedInstructionsList.emplace_back(GEPInfoPair.second,
+                                              DeadValuesNumber);
+        }
+      }
+      sort(SortedInstructionsList, [&OnlyUsedInGEP](auto &LHS, auto &RHS) {
+        return LHS.second > RHS.second || LHS.first.size() > RHS.first.size();
+      });
+
+      // Optimize all chosen GEPs
+      for (unsigned I = 0; I < SortedInstructionsList.size(); I++) {
+        auto DetailedInfoList = SortedInstructionsList[I].first;
+        if (DetailedInfoList.size() > 1 &&
+            any_of(DetailedInfoList, OnlyUsedInGEP)) {
+          for (const auto &GEPInfo : DetailedInfoList) {
+            LLVM_DEBUG(dbgs() << "Try to split GEP " << *GEPInfo.GEPInstruction
+                              << "\n");
+            bool CurrentChanged = splitGEP(GEPInfo.GEPInstruction,
+                                           GEPInfo.AccumulativeByteOffset);
+            Changed |= CurrentChanged;
+            // If GEP is already optimized remove it from lists connected with
+            // other bases.
+            for (unsigned J = I + 1;
+                 J < SortedInstructionsList.size() && CurrentChanged; J++) {
+              auto RemoveIt = remove_if(SortedInstructionsList[J].first,
+                                        [&GEPInfo](const struct GEPInfo &Info) {
+                                          return Info.GEPInstruction ==
+                                                 GEPInfo.GEPInstruction;
+                                        });
+              SortedInstructionsList[J].first.erase(
+                  RemoveIt, SortedInstructionsList[J].first.end());
+            }
+          }
+        }
+      }
+    }
   }
 
   Changed |= reuniteExts(F);
@@ -1378,7 +1549,8 @@
   auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
     return AM.getResult<TargetIRAnalysis>(F);
   };
-  SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
+  SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP,
+                                  CheckProfitability);
   if (!Impl.run(F))
     return PreservedAnalyses::all();
   PreservedAnalyses PA;
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/lit.local.cfg b/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/lit.local.cfg
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'RISCV' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll
@@ -0,0 +1,298 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=riscv64-unknown-elf -passes='separate-const-offset-from-gep<check-profit>,early-cse' \
+; RUN:       -S | FileCheck %s
+
+; Several tests for -separate-const-offset-from-gep. The transformation
+; heavily relies on TargetTransformInfo, so we put these tests under
+; target-specific folders.
+
+; Simple case when GEPs should be optimized.
+define dso_local signext i64 @test1(i64* nocapture noundef %Arr_1, i64 noundef signext %Int_1, i64 noundef signext %Int_2)  {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[INT_1:%.*]], 5
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i64, i64* [[ARR_1:%.*]], i64 [[INT_1]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 5
+; CHECK-NEXT:    store i64 [[INT_2:%.*]], i64* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 6
+; CHECK-NEXT:    store i64 [[INT_2]], i64* [[ARRAYIDX26]], align 4
+; CHECK-NEXT:    [[ARRAYIDX38:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 35
+; CHECK-NEXT:    store i64 [[ADD]], i64* [[ARRAYIDX38]], align 4
+; CHECK-NEXT:    ret i64 undef
+;
+entry:
+  %add = add nsw i64 %Int_1, 5
+  %arrayidx = getelementptr inbounds i64, i64* %Arr_1, i64 %add
+  store i64 %Int_2, i64* %arrayidx
+  %add2 = add nsw i64 %Int_1, 6
+  %arrayidx2 = getelementptr inbounds i64, i64* %Arr_1, i64 %add2
+  store i64 %Int_2, i64* %arrayidx2
+  %add3 = add nsw i64 %Int_1, 35
+  %arrayidx3 = getelementptr inbounds i64, i64* %Arr_1, i64 %add3
+  store i64 %add, i64* %arrayidx3
+  ret i64 undef
+}
+
+; Optimize GEPs when there sext instructions are needed to cast index value to expected type.
+define dso_local signext i32 @test2(i32* nocapture noundef %Arr_1, i32 noundef signext %Int_1, i32 noundef signext %Int_2) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INT_1]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[ARR_1:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
+; CHECK-NEXT:    store i32 [[INT_2:%.*]], i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX54:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6
+; CHECK-NEXT:    store i32 [[INT_2]], i32* [[ARRAYIDX54]], align 4
+; CHECK-NEXT:    [[ARRAYIDX86:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 35
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX86]], align 4
+; CHECK-NEXT:    ret i32 undef
+;
+entry:
+  %add = add nsw i32 %Int_1, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom
+  store i32 %Int_2, i32* %arrayidx
+  %add3 = add nsw i32 %Int_1, 6
+  %idxprom4 = sext i32 %add3 to i64
+  %arrayidx5 = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom4
+  store i32 %Int_2, i32* %arrayidx5
+  %add6 = add nsw i32 %Int_1, 35
+  %idxprom7 = sext i32 %add6 to i64
+  %arrayidx8 = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom7
+  store i32 %add, i32* %arrayidx8
+  ret i32 undef
+}
+
+; No need to modify because all values are also used in other expressions.
+; Modification doesn't decrease register pressure.
+define dso_local signext i32 @test3(i32* nocapture noundef %Arr_1, i32 noundef signext %Int_1) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR_1:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[INT_1]], 6
+; CHECK-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[ADD3]] to i64
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[ARR_1]], i64 [[IDXPROM4]]
+; CHECK-NEXT:    store i32 [[ADD3]], i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[INT_1]], 35
+; CHECK-NEXT:    [[IDXPROM7:%.*]] = sext i32 [[ADD6]] to i64
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[ARR_1]], i64 [[IDXPROM7]]
+; CHECK-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    ret i32 undef
+;
+entry:
+  %add = add nsw i32 %Int_1, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom
+  store i32 %add, i32* %arrayidx
+  %add3 = add nsw i32 %Int_1, 6
+  %idxprom4 = sext i32 %add3 to i64
+  %arrayidx5 = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom4
+  store i32 %add3, i32* %arrayidx5
+  %add6 = add nsw i32 %Int_1, 35
+  %idxprom7 = sext i32 %add6 to i64
+  %arrayidx8 = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom7
+  store i32 %add6, i32* %arrayidx8
+  ret i32 undef
+}
+
+; Optimized GEPs for multidimensional array with same base
+define dso_local signext i32 @test4([50 x i32]* nocapture noundef %Arr_2, i32 noundef signext %Int_1) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INT_1]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [50 x i32], [50 x i32]* [[ARR_2:%.*]], i64 [[TMP0]], i64 [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 255
+; CHECK-NEXT:    store i32 [[INT_1]], i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX56:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 256
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX56]], align 4
+; CHECK-NEXT:    [[ARRAYIDX89:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 285
+; CHECK-NEXT:    store i32 [[INT_1]], i32* [[ARRAYIDX89]], align 4
+; CHECK-NEXT:    ret i32 undef
+;
+entry:
+  %add = add nsw i32 %Int_1, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_2, i64 %idxprom, i64 %idxprom
+  store i32 %Int_1, i32* %arrayidx
+  %add3 = add nsw i32 %Int_1, 6
+  %idxprom4 = sext i32 %add3 to i64
+  %arrayidx5 = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_2, i64 %idxprom, i64 %idxprom4
+  store i32 %add, i32* %arrayidx5
+  %add6 = add nsw i32 %Int_1, 35
+  %idxprom7 = sext i32 %add6 to i64
+  %arrayidx8 = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_2, i64 %idxprom, i64 %idxprom7
+  store i32 %Int_1, i32* %arrayidx8
+  ret i32 undef
+}
+
+; Don't optimize GEPs for multidimensional array with same base because RISC-V doesn't support the addressing mode
+define dso_local signext i32 @test5([50 x i32]* nocapture noundef %Arr_2, i32 noundef signext %Int_1, i64 noundef signext %Int_2) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INT_1]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [50 x i32], [50 x i32]* [[ARR_2:%.*]], i64 [[TMP0]], i64 [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 255
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [50 x i32], [50 x i32]* [[ARR_2]], i64 [[TMP0]], i64 [[INT_2:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX55:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 300
+; CHECK-NEXT:    store i32 [[INT_1]], i32* [[ARRAYIDX55]], align 4
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[INT_1]], 35
+; CHECK-NEXT:    [[IDXPROM7:%.*]] = sext i32 [[ADD6]] to i64
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARR_2]], i64 [[IDXPROM7]], i64 [[INT_2]]
+; CHECK-NEXT:    store i32 [[INT_1]], i32* [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    ret i32 undef
+;
+entry:
+  %add = add nsw i32 %Int_1, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_2, i64 %idxprom, i64 %idxprom
+  store i32 %add, i32* %arrayidx
+  %add3 = add nsw i32 %Int_1, 6
+  %idxprom4 = sext i32 %add3 to i64
+  %arrayidx5 = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_2, i64 %idxprom4, i64 %Int_2
+  store i32 %Int_1, i32* %arrayidx5
+  %add6 = add nsw i32 %Int_1, 35
+  %idxprom7 = sext i32 %add6 to i64
+  %arrayidx8 = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_2, i64 %idxprom7, i64 %Int_2
+  store i32 %Int_1, i32* %arrayidx8
+  ret i32 undef
+}
+
+; No need to optimize GEPs, because there is critical amount with non-constant offsets.
+define dso_local signext i64 @test6(i64* nocapture noundef %Arr_1, i64 noundef signext %Int_1, i64 noundef signext %Int_2) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[INT_1:%.*]], 5
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[ARR_1:%.*]], i64 [[INT_2:%.*]]
+; CHECK-NEXT:    store i64 [[ADD]], i64* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i64 [[INT_1]], 6
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ARR_1]], i64 [[ADD3]]
+; CHECK-NEXT:    store i64 [[INT_1]], i64* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i64, i64* [[ARR_1]], i64 [[INT_1]]
+; CHECK-NEXT:    store i64 [[INT_1]], i64* [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    ret i64 undef
+;
+entry:
+  %add = add nsw i64 %Int_1, 5
+  %arrayidx = getelementptr inbounds i64, i64* %Arr_1, i64 %Int_2
+  store i64 %add, i64* %arrayidx
+  %add3 = add nsw i64 %Int_1, 6
+  %arrayidx5 = getelementptr inbounds i64, i64* %Arr_1, i64 %add3
+  store i64 %Int_1, i64* %arrayidx5
+  %add6 = add nsw i64 %Int_1, 35
+  %arrayidx8 = getelementptr inbounds i64, i64* %Arr_1, i64 %Int_1
+  store i64 %Int_1, i64* %arrayidx8
+  ret i64 undef
+}
+
+; No need to optimize GEPs, because the base variable is different.
+define dso_local signext i32 @test7(i32* nocapture noundef %Arr_1, i32 noundef signext %Int_1, i32 noundef signext %Int_2,
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR_1:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[INT_3:%.*]], 6
+; CHECK-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[ADD3]] to i64
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[ARR_1]], i64 [[IDXPROM4]]
+; CHECK-NEXT:    store i32 [[INT_1]], i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[INT_2:%.*]], 35
+; CHECK-NEXT:    [[IDXPROM7:%.*]] = sext i32 [[ADD6]] to i64
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[ARR_1]], i64 [[IDXPROM7]]
+; CHECK-NEXT:    store i32 [[INT_1]], i32* [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    ret i32 undef
+;
+  i32 noundef signext %Int_3) {
+entry:
+  %add = add nsw i32 %Int_1, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom
+  store i32 %add, i32* %arrayidx
+  %add3 = add nsw i32 %Int_3, 6
+  %idxprom4 = sext i32 %add3 to i64
+  %arrayidx5 = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom4
+  store i32 %Int_1, i32* %arrayidx5
+  %add6 = add nsw i32 %Int_2, 35
+  %idxprom7 = sext i32 %add6 to i64
+  %arrayidx8 = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom7
+  store i32 %Int_1, i32* %arrayidx8
+  ret i32 undef
+}
+
+; No need to optimize GEPs, because the base of GEP instructions is different.
+define dso_local signext i32 @test8(i32* nocapture noundef %Arr_1, i32* nocapture noundef %Arr_2,
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR_1:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[INT_1]], 6
+; CHECK-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[ADD3]] to i64
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[ARR_2:%.*]], i64 [[IDXPROM4]]
+; CHECK-NEXT:    store i32 [[INT_1]], i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[INT_1]], 35
+; CHECK-NEXT:    [[IDXPROM7:%.*]] = sext i32 [[ADD6]] to i64
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[ARR_3:%.*]], i64 [[IDXPROM7]]
+; CHECK-NEXT:    store i32 [[INT_1]], i32* [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    ret i32 undef
+;
+  i32* nocapture noundef %Arr_3, i32 noundef signext %Int_1) {
+entry:
+  %add = add nsw i32 %Int_1, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32, i32* %Arr_1, i64 %idxprom
+  store i32 %add, i32* %arrayidx
+  %add3 = add nsw i32 %Int_1, 6
+  %idxprom4 = sext i32 %add3 to i64
+  %arrayidx5 = getelementptr inbounds i32, i32* %Arr_2, i64 %idxprom4
+  store i32 %Int_1, i32* %arrayidx5
+  %add6 = add nsw i32 %Int_1, 35
+  %idxprom7 = sext i32 %add6 to i64
+  %arrayidx8 = getelementptr inbounds i32, i32* %Arr_3, i64 %idxprom7
+  store i32 %Int_1, i32* %arrayidx8
+  ret i32 undef
+}
+
+; No need to optimize GEPs of multidimensional array, because the base of GEP instructions is different.
+define dso_local signext i32 @test9([50 x i32]* nocapture noundef %Arr_1, i32 noundef signext %Int_1) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[INT_1:%.*]], 5
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARR_1:%.*]], i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[INT_1]], 6
+; CHECK-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[ADD3]] to i64
+; CHECK-NEXT:    [[INT:%.*]] = sext i32 [[INT_1]] to i64
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARR_1]], i64 [[INT]], i64 [[IDXPROM4]]
+; CHECK-NEXT:    store i32 [[INT_1]], i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[INT_1]], 35
+; CHECK-NEXT:    [[IDXPROM7:%.*]] = sext i32 [[ADD6]] to i64
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARR_1]], i64 [[IDXPROM4]], i64 [[IDXPROM7]]
+; CHECK-NEXT:    store i32 [[INT_1]], i32* [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    ret i32 undef
+;
+entry:
+  %add = add nsw i32 %Int_1, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_1, i64 0, i64 %idxprom
+  store i32 %add, i32* %arrayidx
+  %add3 = add nsw i32 %Int_1, 6
+  %idxprom4 = sext i32 %add3 to i64
+  %Int = sext i32 %Int_1 to i64
+  %arrayidx5 = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_1, i64 %Int, i64 %idxprom4
+  store i32 %Int_1, i32* %arrayidx5
+  %add6 = add nsw i32 %Int_1, 35
+  %idxprom7 = sext i32 %add6 to i64
+  %arrayidx8 = getelementptr inbounds [50 x i32], [50 x i32]* %Arr_1, i64 %idxprom4, i64 %idxprom7
+  store i32 %Int_1, i32* %arrayidx8
+  ret i32 undef
+}