diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -442,7 +442,9 @@
 //
 // SeparateConstOffsetFromGEP - Split GEPs for better CSE
 //
-FunctionPass *createSeparateConstOffsetFromGEPPass(bool LowerGEP = false);
+FunctionPass *
+createSeparateConstOffsetFromGEPPass(bool LowerGEP = false,
+                                     bool CheckProfitability = false);
 
 //===----------------------------------------------------------------------===//
 //
diff --git a/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h b/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h
--- a/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h
+++ b/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h
@@ -16,9 +16,12 @@
 class SeparateConstOffsetFromGEPPass
     : public PassInfoMixin<SeparateConstOffsetFromGEPPass> {
   bool LowerGEP;
+  bool CheckProfitability;
 
 public:
-  SeparateConstOffsetFromGEPPass(bool LowerGEP = false) : LowerGEP(LowerGEP) {}
+  SeparateConstOffsetFromGEPPass(bool LowerGEP = false,
+                                 bool CheckProfitability = false)
+      : LowerGEP(LowerGEP), CheckProfitability(CheckProfitability) {}
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
 };
 
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -620,6 +620,11 @@
   return parseSinglePassOption(Params, "minimal", "LowerMatrixIntrinsics");
 }
 
+Expected<bool> parseSeparateConstOffsetFromGEPPassOptions(StringRef Params) {
+  return parseSinglePassOption(Params, "check-profit",
+                               "SeparateConstOffsetFromGEP");
+}
+
 Expected<AddressSanitizerOptions> parseASanPassOptions(StringRef Params) {
   AddressSanitizerOptions Result;
   while (!Params.empty()) {
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -357,7 +357,6 @@
 FUNCTION_PASS("reg2mem", RegToMemPass())
 FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass())
 FUNCTION_PASS("scalarizer", ScalarizerPass())
-FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass())
 FUNCTION_PASS("sccp", SCCPPass())
 FUNCTION_PASS("sink", SinkingPass())
 FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
@@ -441,6 +440,13 @@
                           "no-sink-common-insts;sink-common-insts;"
                           "bonus-inst-threshold=N"
                           )
+FUNCTION_PASS_WITH_PARAMS("separate-const-offset-from-gep",
+                          "SeparateConstOffsetFromGEPPass",
+                          [](bool CheckProfitability) {
+                            return SeparateConstOffsetFromGEPPass(false, CheckProfitability);
+                          },
+                          parseSeparateConstOffsetFromGEPPassOptions,
+                          "check-profit")
 FUNCTION_PASS_WITH_PARAMS("loop-vectorize",
                           "LoopVectorizePass",
                            [](LoopVectorizeOptions Opts) {
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -187,17 +187,21 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <cstdint>
+#include <map>
 #include <string>
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
+#define DEBUG_TYPE "separate-const-offset-from-gep"
+
 static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
     "disable-separate-const-offset-from-gep", cl::init(false),
     cl::desc("Do not separate the constant offset from a GEP instruction"),
@@ -242,7 +246,7 @@
   /// it. It returns the numeric value of the extracted constant offset (0 if
   /// failed). The meaning of the arguments are the same as Extract.
   static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
-                      const DominatorTree *DT);
+                      const DominatorTree *DT, Value *&NonConstantBaseValue);
 
 private:
   ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
@@ -254,20 +258,23 @@
   /// successful, returns C and update UserChain as a def-use chain from C to V;
   /// otherwise, UserChain is empty.
   ///
-  /// \p V            The given expression
-  /// \p SignExtended Whether V will be sign-extended in the computation of the
-  ///                 GEP index
-  /// \p ZeroExtended Whether V will be zero-extended in the computation of the
-  ///                 GEP index
-  /// \p NonNegative  Whether V is guaranteed to be non-negative. For example,
-  ///                 an index of an inbounds GEP is guaranteed to be
-  ///                 non-negative. Levaraging this, we can better split
-  ///                 inbounds GEPs.
-  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
+  /// \p V                    The given expression
+  /// \p SignExtended         Whether V will be sign-extended in the computation
+  ///                         of the GEP index
+  /// \p ZeroExtended         Whether V will be zero-extended in the computation
+  ///                         of the GEP index
+  /// \p NonNegative          Whether V is guaranteed to be non-negative. For
+  ///                         example, an index of an inbounds GEP is guaranteed
+  ///                         to be non-negative. Levaraging this, we can better
+  ///                         split inbounds GEPs.
+  /// \p NonConstantBaseValue The second non-constant operand if V is binary
+  ///                         operator.
+  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative,
+             Value *&NonConstantBaseValue);
 
   /// A helper function to look into both operands of a binary operator.
   APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
-                            bool ZeroExtended);
+                            bool ZeroExtended, Value *&NonConstantBaseValue);
 
   /// After finding the constant offset C from the GEP index I, we build a new
   /// index I' s.t. I' + C = I. This function builds and returns the new
@@ -340,6 +347,44 @@
   const DominatorTree *DT;
 };
 
+/// GEPBaseInfo - structure contains information about possible common base for
+/// GEP instructions.
+struct GEPBaseInfo {
+  /// Pointer used in GEP instruction.
+  const Value *GEPPointer;
+  /// Indexes that precede index that can be optimized.
+  SmallVector<const Value *> PreviousIndices;
+  /// Non constant value that will be used in new base GEP.
+  const Value *NonConstantBaseValue;
+
+  GEPBaseInfo(const Value *GEPPointer,
+              SmallVector<const Value *> PreviousIndices,
+              const Value *NonConstantBaseValue)
+      : GEPPointer(GEPPointer), PreviousIndices(PreviousIndices),
+        NonConstantBaseValue(NonConstantBaseValue) {}
+
+  inline bool operator<(const GEPBaseInfo &RHS) const {
+
+    return GEPPointer < RHS.GEPPointer ||
+           NonConstantBaseValue < RHS.NonConstantBaseValue ||
+           PreviousIndices < RHS.PreviousIndices;
+  }
+};
+
+/// GEPInfo - structure contains basic information about GEP instruction
+/// needed for their modification.
+struct GEPInfo {
+  GetElementPtrInst *GEPInstruction;
+  int64_t AccumulativeByteOffset;
+  SmallVector<const Value *> ConstantIndices;
+
+  GEPInfo(GetElementPtrInst *GEPInstruction, int64_t AccumulativeByteOffset,
+          SmallVector<const Value *> &&Indices)
+      : GEPInstruction(GEPInstruction),
+        AccumulativeByteOffset(AccumulativeByteOffset),
+        ConstantIndices(Indices) {}
+};
+
 /// A pass that tries to split every GEP in the function into a variadic
 /// base and a constant offset. It is a FunctionPass because searching for the
 /// constant offset may inspect other basic blocks.
@@ -347,8 +392,10 @@
 public:
   static char ID;
 
-  SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
-      : FunctionPass(ID), LowerGEP(LowerGEP) {
+  SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false,
+                                       bool CheckProfitability = false)
+      : FunctionPass(ID), LowerGEP(LowerGEP),
+        CheckProfitability(CheckProfitability) {
     initializeSeparateConstOffsetFromGEPLegacyPassPass(
         *PassRegistry::getPassRegistry());
   }
@@ -366,6 +413,7 @@
 
 private:
   bool LowerGEP;
+  bool CheckProfitability;
 };
 
 /// A pass that tries to split every GEP in the function into a variadic
@@ -376,15 +424,21 @@
   SeparateConstOffsetFromGEP(
       DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI,
       TargetLibraryInfo *TLI,
-      function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
-      : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
+      function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP,
+      bool CheckProfitability)
+      : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP),
+        CheckProfitability(CheckProfitability) {}
 
   bool run(Function &F);
 
 private:
   /// Tries to split the given GEP into a variadic base and a constant offset,
   /// and returns true if the splitting succeeds.
-  bool splitGEP(GetElementPtrInst *GEP);
+  bool splitGEP(GetElementPtrInst *GEP, int64_t AccumulativeByteOffset);
+
+  /// Canonize GEP if needed and collect information to decide if GEP
+  /// modification is useful
+  bool preprocessGEP(GetElementPtrInst *GEP);
 
   /// Lower a GEP with multiple indices into multiple GEPs with a single index.
   /// Function splitGEP already split the original GEP into a variadic part and
@@ -408,10 +462,8 @@
 
   /// Finds the constant offset within each index and accumulates them. If
   /// LowerGEP is true, it finds in indices of both sequential and structure
-  /// types, otherwise it only finds in sequential indices. The output
-  /// NeedsExtraction indicates whether we successfully find a non-zero constant
-  /// offset.
-  int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
+  /// types, otherwise it only finds in sequential indices.
+  void accumulateByteOffset(GetElementPtrInst *GEP);
 
   /// Canonicalize array indices to pointer-size integers. This helps to
   /// simplify the logic of splitting a GEP. For example, if a + b is a
@@ -473,8 +525,15 @@
   /// multiple GEPs with a single index.
   bool LowerGEP;
 
+  /// Check the possible profit of optimization to reduce register pressure
+  /// or modify all possible GEPs.
+  bool CheckProfitability;
+
   DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingAdds;
   DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingSubs;
+
+  /// GEP instructions chosen for transformation
+  std::map<GEPBaseInfo, SmallVector<GEPInfo>> InstructionsToTransform;
 };
 
 } // end anonymous namespace
@@ -495,8 +554,10 @@
     "Split GEPs to a variadic base and a constant offset for better CSE", false,
     false)
 
-FunctionPass *llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP) {
-  return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
+FunctionPass *
+llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP,
+                                           bool CheckProfitability) {
+  return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP, CheckProfitability);
 }
 
 bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
@@ -564,29 +625,34 @@
   return true;
 }
 
-APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
-                                                   bool SignExtended,
-                                                   bool ZeroExtended) {
+APInt ConstantOffsetExtractor::findInEitherOperand(
+    BinaryOperator *BO, bool SignExtended, bool ZeroExtended,
+    Value *&NonConstantBaseValue) {
   // Save off the current height of the chain, in case we need to restore it.
   size_t ChainLength = UserChain.size();
 
   // BO being non-negative does not shed light on whether its operands are
   // non-negative. Clear the NonNegative flag here.
   APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
-                              /* NonNegative */ false);
+                              /* NonNegative */ false, NonConstantBaseValue);
   // If we found a constant offset in the left operand, stop and return that.
   // This shortcut might cause us to miss opportunities of combining the
   // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
   // However, such cases are probably already handled by -instcombine,
   // given this pass runs after the standard optimizations.
-  if (ConstantOffset != 0) return ConstantOffset;
+  if (ConstantOffset != 0) {
+    if (!isa<ConstantInt>(BO->getOperand(1))) {
+      NonConstantBaseValue = BO->getOperand(1);
+    }
+    return ConstantOffset;
+  }
 
   // Reset the chain back to where it was when we started exploring this node,
   // since visiting the LHS didn't pan out.
   UserChain.resize(ChainLength);
 
   ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
-                        /* NonNegative */ false);
+                        /* NonNegative */ false, NonConstantBaseValue);
   // If U is a sub operator, negate the constant offset found in the right
   // operand.
   if (BO->getOpcode() == Instruction::Sub)
@@ -596,11 +662,16 @@
   if (ConstantOffset == 0)
     UserChain.resize(ChainLength);
 
+  if (!isa<ConstantInt>(BO->getOperand(0))) {
+    NonConstantBaseValue = BO->getOperand(0);
+  }
+
   return ConstantOffset;
 }
 
 APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
-                                    bool ZeroExtended, bool NonNegative) {
+                                    bool ZeroExtended, bool NonNegative,
+                                    Value *&NonConstantBaseValue) {
   // TODO(jingyue): We could trace into integer/pointer casts, such as
   // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
   // integers because it gives good enough results for our benchmarks.
@@ -617,22 +688,25 @@
   } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
     // Trace into subexpressions for more hoisting opportunities.
     if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
-      ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
+      ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended,
+                                           NonConstantBaseValue);
   } else if (isa<TruncInst>(V)) {
-    ConstantOffset =
-        find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
-            .trunc(BitWidth);
+    ConstantOffset = find(U->getOperand(0), SignExtended, ZeroExtended,
+                          NonNegative, NonConstantBaseValue)
+                         .trunc(BitWidth);
   } else if (isa<SExtInst>(V)) {
     ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
-                          ZeroExtended, NonNegative).sext(BitWidth);
+                          ZeroExtended, NonNegative, NonConstantBaseValue)
+                         .sext(BitWidth);
   } else if (isa<ZExtInst>(V)) {
     // As an optimization, we can clear the SignExtended flag because
     // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
     //
     // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
-    ConstantOffset =
-        find(U->getOperand(0), /* SignExtended */ false,
-             /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
+    ConstantOffset = find(U->getOperand(0), /* SignExtended */ false,
+                          /* ZeroExtended */ true, /* NonNegative */ false,
+                          NonConstantBaseValue)
+                         .zext(BitWidth);
   }
 
   // If we found a non-zero constant offset, add it to the path for
@@ -768,10 +842,11 @@
                                         User *&UserChainTail,
                                         const DominatorTree *DT) {
   ConstantOffsetExtractor Extractor(GEP, DT);
+  Value *NonConstantBaseValue = nullptr;
   // Find a non-zero constant offset first.
   APInt ConstantOffset =
       Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
-                     GEP->isInBounds());
+                     GEP->isInBounds(), NonConstantBaseValue);
   if (ConstantOffset == 0) {
     UserChainTail = nullptr;
     return nullptr;
@@ -783,11 +858,12 @@
 }
 
 int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP,
-                                      const DominatorTree *DT) {
+                                      const DominatorTree *DT,
+                                      Value *&NonConstantBaseValue) {
   // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
   return ConstantOffsetExtractor(GEP, DT)
       .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
-            GEP->isInBounds())
+            GEP->isInBounds(), NonConstantBaseValue)
       .getSExtValue();
 }
 
@@ -809,37 +885,77 @@
   return Changed;
 }
 
-int64_t
-SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
-                                                 bool &NeedsExtraction) {
-  NeedsExtraction = false;
+void SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP) {
   int64_t AccumulativeByteOffset = 0;
   gep_type_iterator GTI = gep_type_begin(*GEP);
+  SmallVector<const Value *> ConstantIndices;
+  SmallVector<GEPBaseInfo, 2> PossibleBases;
+
   for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+    Value *NonConstantBaseValue = nullptr;
     if (GTI.isSequential()) {
       // Tries to extract a constant offset from this GEP index.
-      int64_t ConstantOffset =
-          ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
+      int64_t ConstantOffset = ConstantOffsetExtractor::Find(
+          GEP->getOperand(I), GEP, DT, NonConstantBaseValue);
       if (ConstantOffset != 0) {
-        NeedsExtraction = true;
+        if (CheckProfitability || PossibleBases.empty()) {
+          PossibleBases.emplace_back(
+              GEP->getPointerOperand(),
+              SmallVector<const Value *, 4>(GEP->idx_begin(),
+                                            GEP->idx_begin() + I - 1),
+              NonConstantBaseValue);
+        }
+
         // A GEP may have multiple indices.  We accumulate the extracted
         // constant offset to a byte offset, and later offset the remainder of
         // the original GEP with this byte offset.
         AccumulativeByteOffset +=
             ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
+        ConstantIndices.push_back(GEP->getOperand(I));
       }
     } else if (LowerGEP) {
       StructType *StTy = GTI.getStructType();
       uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
       // Skip field 0 as the offset is always 0.
       if (Field != 0) {
-        NeedsExtraction = true;
+        if (CheckProfitability || PossibleBases.empty()) {
+          PossibleBases.emplace_back(GEP->getPointerOperand(),
+                                     SmallVector<const Value *>(),
+                                     NonConstantBaseValue);
+        }
         AccumulativeByteOffset +=
             DL->getStructLayout(StTy)->getElementOffset(Field);
       }
     }
   }
-  return AccumulativeByteOffset;
+  TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
+
+  // If LowerGEP is disabled, before really splitting the GEP, check whether the
+  // backend supports the addressing mode we are about to produce. If no, this
+  // splitting probably won't be beneficial.
+  // If LowerGEP is enabled, even the extracted constant offset can not match
+  // the addressing mode, we can still do optimizations to other lowered parts
+  // of variable indices. Therefore, we don't check for addressing modes in that
+  // case.
+  if (!LowerGEP) {
+    unsigned AddrSpace = GEP->getPointerAddressSpace();
+    if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
+                                   /*BaseGV=*/nullptr, AccumulativeByteOffset,
+                                   /*HasBaseReg=*/true, /*Scale=*/0,
+                                   AddrSpace)) {
+      LLVM_DEBUG(
+          dbgs()
+          << "Don't optimize. The backend doesn't support the addressing mode\n");
+      return;
+    }
+  }
+  for (const GEPBaseInfo &Base : PossibleBases) {
+    if (InstructionsToTransform.find(Base) == InstructionsToTransform.end()) {
+      InstructionsToTransform[Base] = SmallVector<GEPInfo>();
+    }
+    InstructionsToTransform[Base].emplace_back(GEP, AccumulativeByteOffset,
+                                               std::move(ConstantIndices));
+  }
 }
 
 void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
@@ -913,9 +1029,8 @@
   Variadic->eraseFromParent();
 }
 
-void
-SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
-                                               int64_t AccumulativeByteOffset) {
+void SeparateConstOffsetFromGEP::lowerToArithmetics(
+    GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
   IRBuilder<> Builder(Variadic);
   Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
 
@@ -959,7 +1074,7 @@
   Variadic->eraseFromParent();
 }
 
-bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
+bool SeparateConstOffsetFromGEP::preprocessGEP(GetElementPtrInst *GEP) {
   // Skip vector GEPs.
   if (GEP->getType()->isVectorTy())
     return false;
@@ -971,31 +1086,15 @@
 
   bool Changed = canonicalizeArrayIndicesToPointerSize(GEP);
 
-  bool NeedsExtraction;
-  int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
+  accumulateByteOffset(GEP);
 
-  if (!NeedsExtraction)
-    return Changed;
+  return Changed;
+}
 
+bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP,
+                                          int64_t AccumulativeByteOffset) {
   TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
 
-  // If LowerGEP is disabled, before really splitting the GEP, check whether the
-  // backend supports the addressing mode we are about to produce. If no, this
-  // splitting probably won't be beneficial.
-  // If LowerGEP is enabled, even the extracted constant offset can not match
-  // the addressing mode, we can still do optimizations to other lowered parts
-  // of variable indices. Therefore, we don't check for addressing modes in that
-  // case.
-  if (!LowerGEP) {
-    unsigned AddrSpace = GEP->getPointerAddressSpace();
-    if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
-                                   /*BaseGV=*/nullptr, AccumulativeByteOffset,
-                                   /*HasBaseReg=*/true, /*Scale=*/0,
-                                   AddrSpace)) {
-      return Changed;
-    }
-  }
-
   // Remove the constant offset in each sequential index. The resultant GEP
   // computes the variadic base.
   // Notice that we don't remove struct field indices here. If LowerGEP is
@@ -1152,7 +1251,8 @@
   auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
     return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
   };
-  SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
+  SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP,
+                                  CheckProfitability);
   return Impl.run(F);
 }
 
@@ -1162,15 +1262,85 @@
 
   DL = &F.getParent()->getDataLayout();
   bool Changed = false;
+
+  auto OnlyUsedInGEP = [](const GEPInfo &GEPInfo) {
+    bool OnlyUsedInGEP = GEPInfo.ConstantIndices.empty();
+    for (const Value *Index : GEPInfo.ConstantIndices) {
+      uint64_t NumUses = Index->getNumUses();
+      // In case of cast instruction check usages of both result and original
+      // value.
+      if (isa<SExtInst>(Index) || isa<ZExtInst>(Index) ||
+          isa<TruncInst>(Index)) {
+        NumUses += cast<Instruction>(Index)->getOperand(0)->getNumUses() - 1;
+      }
+      OnlyUsedInGEP |= NumUses == 1;
+    }
+    return OnlyUsedInGEP;
+  };
+
+  LLVM_DEBUG(dbgs() << "========= Function " << F.getName() << " =========\n");
   for (BasicBlock &B : F) {
+    InstructionsToTransform.clear();
     if (!DT->isReachableFromEntry(&B))
       continue;
 
     for (Instruction &I : llvm::make_early_inc_range(B))
       if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
-        Changed |= splitGEP(GEP);
-    // No need to split GEP ConstantExprs because all its indices are constant
-    // already.
+        Changed |= preprocessGEP(GEP);
+
+    if (!CheckProfitability) {
+      for (const auto &GEPInfoPair : InstructionsToTransform) {
+        for (const auto &GEPInfo : GEPInfoPair.second) {
+          Changed |=
+              splitGEP(GEPInfo.GEPInstruction, GEPInfo.AccumulativeByteOffset);
+          // No need to split GEP ConstantExprs because all its indices are
+          // constant already.
+        }
+      }
+    } else {
+      // As far as one instruction can be optimized using different base, choose
+      // the best base option based on possible effect for decreasing register
+      // pressure. Sort all found bases in decreasing order of possible effect.
+      SmallVector<std::pair<SmallVector<GEPInfo>, unsigned>>
+          SortedInstructionsList;
+      for (const auto &GEPInfoPair : InstructionsToTransform) {
+        unsigned DeadValuesNumber = count_if(GEPInfoPair.second, OnlyUsedInGEP);
+        if (DeadValuesNumber > 0) {
+          SortedInstructionsList.emplace_back(GEPInfoPair.second,
+                                              DeadValuesNumber);
+        }
+      }
+      sort(SortedInstructionsList, [&OnlyUsedInGEP](auto &LHS, auto &RHS) {
+        return LHS.second > RHS.second || LHS.first.size() > RHS.first.size();
+      });
+
+      // Optimize all chosen GEPs
+      for (unsigned I = 0; I < SortedInstructionsList.size(); I++) {
+        auto DetailedInfoList = SortedInstructionsList[I].first;
+        if (DetailedInfoList.size() > 1 &&
+            any_of(DetailedInfoList, OnlyUsedInGEP)) {
+          for (const auto &GEPInfo : DetailedInfoList) {
+            LLVM_DEBUG(dbgs() << "Try to split GEP " << *GEPInfo.GEPInstruction
+                              << "\n");
+            bool CurrentChanged = splitGEP(GEPInfo.GEPInstruction,
+                                           GEPInfo.AccumulativeByteOffset);
+            Changed |= CurrentChanged;
+            // If GEP is already optimized remove it from lists connected with
+            // other bases.
+            for (unsigned J = I + 1;
+                 J < SortedInstructionsList.size() && CurrentChanged; J++) {
+              auto RemoveIt = remove_if(SortedInstructionsList[J].first,
+                                        [&GEPInfo](const struct GEPInfo &Info) {
+                                          return Info.GEPInstruction ==
+                                                 GEPInfo.GEPInstruction;
+                                        });
+              SortedInstructionsList[J].first.erase(
+                  RemoveIt, SortedInstructionsList[J].first.end());
+            }
+          }
+        }
+      }
+    }
   }
 
   Changed |= reuniteExts(F);
@@ -1378,7 +1548,8 @@
   auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
     return AM.getResult<TargetIRAnalysis>(F);
   };
-  SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
+  SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP,
+                                  CheckProfitability);
   if (!Impl.run(F))
     return PreservedAnalyses::all();
   PreservedAnalyses PA;
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/RISCV/split-gep.ll
@@ -1,11 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=riscv64-unknown-elf -passes='separate-const-offset-from-gep,early-cse' \
+; RUN: opt < %s -mtriple=riscv64-unknown-elf -passes='separate-const-offset-from-gep<check-profit>,early-cse' \
 ; RUN:       -S | FileCheck %s
 
 ; Several tests for -separate-const-offset-from-gep. The transformation
 ; heavily relies on TargetTransformInfo, so we put these tests under
 ; target-specific folders.
 
+; Positive test
 ; Simple case when GEPs should be optimized.
 define i64 @test1(i64* %array, i64 %i, i64 %j)  {
 ; CHECK-LABEL: @test1(
@@ -33,6 +34,7 @@
   ret i64 undef
 }
 
+; Positive test
 ; Optimize GEPs when there sext instructions are needed to cast index value to expected type.
 define i32 @test2(i32* %array, i32 %i, i32 %j) {
 ; CHECK-LABEL: @test2(
@@ -64,22 +66,24 @@
   ret i32 undef
 }
 
+; Negative test
 ; No need to modify because all values are also used in other expressions.
 ; Modification doesn't decrease register pressure.
 define i32 @test3(i32* %array, i32 %i) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[I:%.*]], 5
-; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[I]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[ARRAY:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[GEP2]], align 4
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i32 [[ADD]] to i64
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[SEXT]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[GEP]], align 4
 ; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[I]], 6
-; CHECK-NEXT:    [[GEP54:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6
-; CHECK-NEXT:    store i32 [[ADD3]], i32* [[GEP54]], align 4
+; CHECK-NEXT:    [[SEXT4:%.*]] = sext i32 [[ADD3]] to i64
+; CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds i32, i32* [[ARRAY]], i64 [[SEXT4]]
+; CHECK-NEXT:    store i32 [[ADD3]], i32* [[GEP5]], align 4
 ; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[I]], 35
-; CHECK-NEXT:    [[GEP86:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 35
-; CHECK-NEXT:    store i32 [[ADD6]], i32* [[GEP86]], align 4
+; CHECK-NEXT:    [[SEXT7:%.*]] = sext i32 [[ADD6]] to i64
+; CHECK-NEXT:    [[GEP8:%.*]] = getelementptr inbounds i32, i32* [[ARRAY]], i64 [[SEXT7]]
+; CHECK-NEXT:    store i32 [[ADD6]], i32* [[GEP8]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
@@ -98,6 +102,7 @@
   ret i32 undef
 }
 
+; Positive test
 ; Optimized GEPs for multidimensional array with same base
 define i32 @test4([50 x i32]* %array2, i32 %i) {
 ; CHECK-LABEL: @test4(
@@ -163,6 +168,7 @@
   ret i32 undef
 }
 
+; Negative test
 ; No need to optimize GEPs, because there is critical amount with non-constant offsets.
 define i64 @test6(i64* %array, i64 %i, i64 %j) {
 ; CHECK-LABEL: @test6(
@@ -170,10 +176,11 @@
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[I:%.*]], 5
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i64, i64* [[ARRAY:%.*]], i64 [[J:%.*]]
 ; CHECK-NEXT:    store i64 [[ADD]], i64* [[GEP]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i64, i64* [[ARRAY]], i64 [[I]]
-; CHECK-NEXT:    [[GEP52:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 6
-; CHECK-NEXT:    store i64 [[I]], i64* [[GEP52]], align 4
-; CHECK-NEXT:    store i64 [[I]], i64* [[TMP0]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i64 [[I]], 6
+; CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds i64, i64* [[ARRAY]], i64 [[ADD3]]
+; CHECK-NEXT:    store i64 [[I]], i64* [[GEP5]], align 4
+; CHECK-NEXT:    [[GEP8:%.*]] = getelementptr inbounds i64, i64* [[ARRAY]], i64 [[I]]
+; CHECK-NEXT:    store i64 [[I]], i64* [[GEP8]], align 4
 ; CHECK-NEXT:    ret i64 undef
 ;
 entry:
@@ -189,23 +196,23 @@
   ret i64 undef
 }
 
+; Negative test
 ; No need to optimize GEPs, because the base variable is different.
 define i32 @test7(i32* %array, i32 %i, i32 %j, i32 %k) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[I:%.*]], 5
-; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[I]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[ARRAY:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[GEP2]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[K:%.*]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[ARRAY]], i64 [[TMP2]]
-; CHECK-NEXT:    [[GEP54:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 6
-; CHECK-NEXT:    store i32 [[I]], i32* [[GEP54]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[J:%.*]] to i64
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, i32* [[ARRAY]], i64 [[TMP4]]
-; CHECK-NEXT:    [[GEP86:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i64 35
-; CHECK-NEXT:    store i32 [[I]], i32* [[GEP86]], align 4
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i32 [[ADD]] to i64
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[SEXT]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[GEP]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[K:%.*]], 6
+; CHECK-NEXT:    [[SEXT4:%.*]] = sext i32 [[ADD3]] to i64
+; CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds i32, i32* [[ARRAY]], i64 [[SEXT4]]
+; CHECK-NEXT:    store i32 [[I]], i32* [[GEP5]], align 4
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[J:%.*]], 35
+; CHECK-NEXT:    [[SEXT7:%.*]] = sext i32 [[ADD6]] to i64
+; CHECK-NEXT:    [[GEP8:%.*]] = getelementptr inbounds i32, i32* [[ARRAY]], i64 [[SEXT7]]
+; CHECK-NEXT:    store i32 [[I]], i32* [[GEP8]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
@@ -224,21 +231,23 @@
   ret i32 undef
 }
 
+; Negative test
 ; No need to optimize GEPs, because the base of GEP instructions is different.
 define i32 @test8(i32* %array, i32* %array2, i32* %array3, i32 %i) {
 ; CHECK-LABEL: @test8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[I:%.*]], 5
-; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[I]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[ARRAY:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[GEP2]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[ARRAY2:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[GEP54:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 6
-; CHECK-NEXT:    store i32 [[I]], i32* [[GEP54]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[ARRAY3:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[GEP86:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 35
-; CHECK-NEXT:    store i32 [[I]], i32* [[GEP86]], align 4
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i32 [[ADD]] to i64
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[SEXT]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[GEP]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[I]], 6
+; CHECK-NEXT:    [[SEXT4:%.*]] = sext i32 [[ADD3]] to i64
+; CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds i32, i32* [[ARRAY2:%.*]], i64 [[SEXT4]]
+; CHECK-NEXT:    store i32 [[I]], i32* [[GEP5]], align 4
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[I]], 35
+; CHECK-NEXT:    [[SEXT7:%.*]] = sext i32 [[ADD6]] to i64
+; CHECK-NEXT:    [[GEP8:%.*]] = getelementptr inbounds i32, i32* [[ARRAY3:%.*]], i64 [[SEXT7]]
+; CHECK-NEXT:    store i32 [[I]], i32* [[GEP8]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
@@ -257,20 +266,24 @@
   ret i32 undef
 }
 
+; Negative test
 ; No need to optimize GEPs of multidimensional array, because the base of GEP instructions is different.
 define i32 @test9([50 x i32]* %array, i32 %i) {
 ; CHECK-LABEL: @test9(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[I:%.*]], 5
-; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[I]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [50 x i32], [50 x i32]* [[ARRAY:%.*]], i64 0, i64 [[TMP0]]
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[GEP2]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [50 x i32], [50 x i32]* [[ARRAY]], i64 [[TMP0]], i64 [[TMP0]]
-; CHECK-NEXT:    [[GEP54:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 6
-; CHECK-NEXT:    store i32 [[I]], i32* [[GEP54]], align 4
-; CHECK-NEXT:    [[GEP87:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 335
-; CHECK-NEXT:    store i32 [[I]], i32* [[GEP87]], align 4
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i32 [[ADD]] to i64
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARRAY:%.*]], i64 0, i64 [[SEXT]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[GEP]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[I]], 6
+; CHECK-NEXT:    [[SEXT4:%.*]] = sext i32 [[ADD3]] to i64
+; CHECK-NEXT:    [[INT:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARRAY]], i64 [[INT]], i64 [[SEXT4]]
+; CHECK-NEXT:    store i32 [[I]], i32* [[GEP5]], align 4
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[I]], 35
+; CHECK-NEXT:    [[SEXT7:%.*]] = sext i32 [[ADD6]] to i64
+; CHECK-NEXT:    [[GEP8:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* [[ARRAY]], i64 [[SEXT4]], i64 [[SEXT7]]
+; CHECK-NEXT:    store i32 [[I]], i32* [[GEP8]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry: