Index: lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
===================================================================
--- lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -118,18 +118,17 @@
 class ConstantOffsetExtractor {
  public:
   /// Extracts a constant offset from the given GEP index. It outputs the
-  /// numeric value of the extracted constant offset (0 if failed), and a
   /// new index representing the remainder (equal to the original index minus
   /// the constant offset).
   /// \p Idx    The given GEP index
-  /// \p NewIdx The new index to replace (output)
   /// \p DL     The datalayout of the module
   /// \p GEP    The given GEP
-  static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL,
-                         GetElementPtrInst *GEP);
+  static Value *Extract(Value *Idx, const DataLayout *DL,
+                        GetElementPtrInst *GEP);
   /// Looks for a constant offset without extracting it. The meaning of the
   /// arguments and the return value are the same as Extract.
-  static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP);
+  static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP,
+                      bool &FoundConst);
 
  private:
   ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt)
@@ -148,10 +147,12 @@
   ///                 an index of an inbounds GEP is guaranteed to be
   ///                 non-negative. Levaraging this, we can better split
   ///                 inbounds GEPs.
-  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
+  /// \p FoundConst   Whether V constains constants.
+  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative,
+             bool &FoundConst);
   /// A helper function to look into both operands of a binary operator.
-  APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
-                            bool ZeroExtended);
+  APInt findInOperands(BinaryOperator *BO, bool SignExtended, bool ZeroExtended,
+                       bool &FoundConst);
   /// After finding the constant offset C from the GEP index I, we build a new
   /// index I' s.t. I' + C = I. This function builds and returns the new
   /// index I' according to UserChain produced by function "find".
@@ -182,10 +183,10 @@
   ///
   /// \p ChainIndex The index to UserChain. ChainIndex is initially
   ///               UserChain.size() - 1, and is decremented during
-  ///               the recursion.
-  Value *distributeExtsAndCloneChain(unsigned ChainIndex);
+  ///               the recursion. The next index to visit is ChainIndex - 1.
+  Value *distributeExtsAndCloneChain(unsigned &ChainIndex);
   /// Reassociates the GEP index to the form I' + C and returns I'.
-  Value *removeConstOffset(unsigned ChainIndex);
+  Value *removeConstOffset(unsigned &ChainIndex);
   /// A helper function to apply ExtInsts, a list of s/zext, to value V.
   /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
   /// returns "sext i32 (zext i16 V to i32) to i64".
@@ -207,12 +208,24 @@
   bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
                     bool NonNegative);
 
-  /// The path from the constant offset to the old GEP index. e.g., if the GEP
-  /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
-  /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
-  /// UserChain[2] will be the entire expression "a * b + (c + 5)".
-  ///
-  /// This path helps to rebuild the new GEP index.
+  /// UserChain records the visit path from constant offsets to the old GEP
+  /// index. e.g. if the GEP index is calculated as following:
+  ///                  b = a + 3
+  ///                  c = b + 4;
+  ///                  e = d + 8;
+  ///                  f = c + e;
+  /// And such index is actually a tree:
+  ///                    f = c + e
+  ///                      /     \
+  ///               c = b + 4   e = d + 8
+  ///                    /   \     \
+  ///             b = a + 3   4     8
+  ///                  /
+  ///                 3
+  /// The traversal is DFS(Depth-Fist Search) and stored in post-order, so such
+  /// nodes are kept in UserChain as following:
+  ///     3, b = a + 3, 4, c = b + 4, 8, e = d + 8, f = c + e;
+  /// This sequence is important when we try to remove constants.
   SmallVector<User *, 8> UserChain;
   /// A data structure used in rebuildWithoutConstOffset. Contains all
   /// sext/zext instructions along UserChain.
@@ -355,30 +368,23 @@
   return true;
 }
 
-APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
-                                                   bool SignExtended,
-                                                   bool ZeroExtended) {
+APInt ConstantOffsetExtractor::findInOperands(BinaryOperator *BO,
+                                              bool SignExtended,
+                                              bool ZeroExtended,
+                                              bool &FoundConst) {
   // BO being non-negative does not shed light on whether its operands are
   // non-negative. Clear the NonNegative flag here.
-  APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
-                              /* NonNegative */ false);
-  // If we found a constant offset in the left operand, stop and return that.
-  // This shortcut might cause us to miss opportunities of combining the
-  // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
-  // However, such cases are probably already handled by -instcombine,
-  // given this pass runs after the standard optimizations.
-  if (ConstantOffset != 0) return ConstantOffset;
-  ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
-                        /* NonNegative */ false);
-  // If U is a sub operator, negate the constant offset found in the right
-  // operand.
-  if (BO->getOpcode() == Instruction::Sub)
-    ConstantOffset = -ConstantOffset;
-  return ConstantOffset;
+  APInt Constant0 = find(BO->getOperand(0), SignExtended, ZeroExtended,
+                         /* NonNegative */ false, FoundConst);
+  APInt Constant1 = find(BO->getOperand(1), SignExtended, ZeroExtended,
+                         /* NonNegative */ false, FoundConst);
+  bool IsSub = (BO->getOpcode() == Instruction::Sub);
+  return IsSub ? Constant0 - Constant1 : Constant0 + Constant1;
 }
 
 APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
-                                    bool ZeroExtended, bool NonNegative) {
+                                    bool ZeroExtended, bool NonNegative,
+                                    bool &FoundConst) {
   // TODO(jingyue): We could trace into integer/pointer casts, such as
   // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
   // integers because it gives good enough results for our benchmarks.
@@ -389,32 +395,38 @@
   if (U == nullptr) return APInt(BitWidth, 0);
 
   APInt ConstantOffset(BitWidth, 0);
+  bool FindInOperands = false;
   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     // Hooray, we found it!
     ConstantOffset = CI->getValue();
+    // Zero is a valid constant offset, but doesn't help this optimization.
+    FindInOperands = (ConstantOffset != 0);
   } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
     // Trace into subexpressions for more hoisting opportunities.
     if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) {
-      ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
+      ConstantOffset =
+          findInOperands(BO, SignExtended, ZeroExtended, FindInOperands);
     }
   } else if (isa<SExtInst>(V)) {
     ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
-                          ZeroExtended, NonNegative).sext(BitWidth);
+                          ZeroExtended, NonNegative,
+                          FindInOperands).sext(BitWidth);
   } else if (isa<ZExtInst>(V)) {
     // As an optimization, we can clear the SignExtended flag because
     // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
     //
     // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
-    ConstantOffset =
-        find(U->getOperand(0), /* SignExtended */ false,
-             /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
+    ConstantOffset = find(U->getOperand(0), /* SignExtended */ false,
+                          /* ZeroExtended */ true, /* NonNegative */ false,
+                          FindInOperands).zext(BitWidth);
   }
 
-  // If we found a non-zero constant offset, add it to the path for
-  // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
-  // help this optimization.
-  if (ConstantOffset != 0)
+  // If we found constant offset in current Value, add it to the path for
+  // rebuildWithoutConstOffset.
+  if (FindInOperands) {
     UserChain.push_back(U);
+    FoundConst = true;
+  }
   return ConstantOffset;
 }
 
@@ -438,7 +450,11 @@
 }
 
 Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
-  distributeExtsAndCloneChain(UserChain.size() - 1);
+  assert(UserChain.size() > 0 && "Empty Chain");
+
+  unsigned ChainIndex = UserChain.size() - 1;
+  distributeExtsAndCloneChain(ChainIndex);
+  assert(ChainIndex == 0 && "Make sure that all Users have been visited");
   // Remove all nullptrs (used to be s/zext) from UserChain.
   unsigned NewSize = 0;
   for (auto I = UserChain.begin(), E = UserChain.end(); I != E; ++I) {
@@ -448,63 +464,70 @@
     }
   }
   UserChain.resize(NewSize);
-  return removeConstOffset(UserChain.size() - 1);
+  ChainIndex = NewSize - 1;
+  // Remove all the constant offsets and generate a new index.
+  Value *NewIdx = removeConstOffset(ChainIndex);
+  assert(ChainIndex == 0 && "Make sure that all Users have been visited");
+  return NewIdx;
 }
 
 Value *
-ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
+ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned &ChainIndex) {
   User *U = UserChain[ChainIndex];
-  if (ChainIndex == 0) {
-    assert(isa<ConstantInt>(U));
-    // If U is a ConstantInt, applyExts will return a ConstantInt as well.
+  // If U is a ConstantInt, applyExts will return a ConstantInt as well.
+  if (isa<ConstantInt>(U))
     return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
-  }
 
+  assert(ChainIndex != 0 &&
+         "Only Constant is allowed the last one in the Chain");
+  // Store current index in case that ChainIndex may be decremented.
+  unsigned ThisIndex = ChainIndex;
   if (CastInst *Cast = dyn_cast<CastInst>(U)) {
     assert((isa<SExtInst>(Cast) || isa<ZExtInst>(Cast)) &&
            "We only traced into two types of CastInst: sext and zext");
     ExtInsts.push_back(Cast);
     UserChain[ChainIndex] = nullptr;
-    return distributeExtsAndCloneChain(ChainIndex - 1);
+    Value *NewV = distributeExtsAndCloneChain(--ChainIndex);
+    // Pop back as this CastInst is only used when visiting it's operand.
+    ExtInsts.pop_back();
+    return NewV;
   }
 
   // Function find only trace into BinaryOperator and CastInst.
   BinaryOperator *BO = cast<BinaryOperator>(U);
-  // OpNo = which operand of BO is UserChain[ChainIndex - 1]
-  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
-  Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
-  Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
-
-  BinaryOperator *NewBO = nullptr;
-  if (OpNo == 0) {
-    NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
-                                   BO->getName(), IP);
+
+  Value *NewOp0, *NewOp1;
+  // At least one of the two operands are the next in Chain. Firstly check if
+  // the next to be visited is operand 1.
+  // If ture, the next to be visited may be operand 0 (depend on whether
+  // operand 0 is equal to the next in chain).
+  // If false, the next to be visited must be operand 0.
+  if (BO->getOperand(1) == UserChain[ChainIndex - 1]) {
+    NewOp1 = distributeExtsAndCloneChain(--ChainIndex);
+
+    if (ChainIndex != 0 && BO->getOperand(0) == UserChain[ChainIndex - 1])
+      NewOp0 = distributeExtsAndCloneChain(--ChainIndex);
+    else
+      NewOp0 = applyExts(BO->getOperand(0));
   } else {
-    NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
-                                   BO->getName(), IP);
+    assert(BO->getOperand(0) == UserChain[ChainIndex - 1] &&
+           "At least one operand is next in Chain");
+    NewOp1 = applyExts(BO->getOperand(1));
+    NewOp0 = distributeExtsAndCloneChain(--ChainIndex);
   }
-  return UserChain[ChainIndex] = NewBO;
+
+  BinaryOperator *NewBO = BinaryOperator::Create(BO->getOpcode(), NewOp0,
+                                                 NewOp1, BO->getName(), IP);
+  return UserChain[ThisIndex] = NewBO;
 }
 
-Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
-  if (ChainIndex == 0) {
-    assert(isa<ConstantInt>(UserChain[ChainIndex]));
+Value *ConstantOffsetExtractor::removeConstOffset(unsigned &ChainIndex) {
+  if (isa<ConstantInt>(UserChain[ChainIndex]))
     return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
-  }
 
+  assert(ChainIndex != 0 &&
+         "Only Constant is allowed to be the last one in the Chain");
   BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
-  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
-  assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
-  Value *NextInChain = removeConstOffset(ChainIndex - 1);
-  Value *TheOther = BO->getOperand(1 - OpNo);
-
-  // If NextInChain is 0 and not the LHS of a sub, we can simplify the
-  // sub-expression to be just TheOther.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
-    if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
-      return TheOther;
-  }
-
   if (BO->getOpcode() == Instruction::Or) {
     // Rebuild "or" as "add", because "or" may be invalid for the new
     // epxression.
@@ -519,45 +542,75 @@
     //
     // Replacing the "or" with "add" is fine, because
     //   a | (b + 5) = a + (b + 5) = (a + b) + 5
-    return BinaryOperator::CreateAdd(BO->getOperand(0), BO->getOperand(1),
-                                     BO->getName(), IP);
+    BO = BinaryOperator::CreateAdd(BO->getOperand(0), BO->getOperand(1),
+                                   BO->getName(), BO);
   }
 
-  // We can reuse BO in this case, because the new expression shares the same
-  // instruction type and BO is used at most once.
-  assert(BO->getNumUses() <= 1 &&
-         "distributeExtsAndCloneChain clones each BinaryOperator in "
-         "UserChain, so no one should be used more than "
-         "once");
-  BO->setOperand(OpNo, NextInChain);
+  // Similar to distributeExtsAndCloneChain. Firstly check if the next to be
+  // visited is operand 1.
+  // If ture, the next to be visited may be operand 0 (depend on whether
+  // operand 0 is equal to the next in chain).
+  // If false, the next to be visited must be operand 0.
+  Value *NewOp0 = BO->getOperand(0), *NewOp1 = BO->getOperand(1);
+  if (BO->getOperand(1) == UserChain[ChainIndex - 1]) {
+    NewOp1 = removeConstOffset(--ChainIndex);
+    BO->setOperand(1, NewOp1);
+
+    if (ChainIndex != 0 && BO->getOperand(0) == UserChain[ChainIndex - 1]) {
+      NewOp0 = removeConstOffset(--ChainIndex);
+      BO->setOperand(0, NewOp0);
+    }
+  } else {
+    assert(BO->getOperand(0) == UserChain[ChainIndex - 1] &&
+           "At least one operand is next in Chain");
+    NewOp0 = removeConstOffset(--ChainIndex);
+    BO->setOperand(0, NewOp0);
+  }
+
+  // The new operand can be removed if it is constant.
+  bool RemoveOp0 = false, RemoveOp1 = false;
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(NewOp1)) {
+    assert(CI->isZero() && "Make sure that all constants have been removed");
+    RemoveOp1 = true;
+  }
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(NewOp0)) {
+    assert(CI->isZero() && "Make sure that all constants have been removed");
+    // Can not remove constant 0 from (0 - b)
+    if (BO->getOpcode() != Instruction::Sub || RemoveOp1) {
+      RemoveOp0 = true;
+    }
+  }
+
+  if (RemoveOp0 && RemoveOp1)
+    return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
+  if (RemoveOp0 || RemoveOp1)
+    return RemoveOp0 ? NewOp1 : NewOp0;
+
   BO->setHasNoSignedWrap(false);
   BO->setHasNoUnsignedWrap(false);
-  // Make sure it appears after all instructions we've inserted so far.
-  BO->moveBefore(IP);
   return BO;
 }
 
-int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx,
-                                         const DataLayout *DL,
-                                         GetElementPtrInst *GEP) {
+Value *ConstantOffsetExtractor::Extract(Value *Idx, const DataLayout *DL,
+                                        GetElementPtrInst *GEP) {
   ConstantOffsetExtractor Extractor(DL, GEP);
-  // Find a non-zero constant offset first.
-  APInt ConstantOffset =
-      Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
-                     GEP->isInBounds());
-  if (ConstantOffset != 0) {
+  // Try to find constant offsets.
+  bool FoundConst = false;
+  Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
+                 GEP->isInBounds(), FoundConst);
+  if (FoundConst)
     // Separates the constant offset from the GEP index.
-    NewIdx = Extractor.rebuildWithoutConstOffset();
-  }
-  return ConstantOffset.getSExtValue();
+    return Extractor.rebuildWithoutConstOffset();
+  return nullptr;
 }
 
 int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL,
-      GetElementPtrInst *GEP) {
+                                      GetElementPtrInst *GEP,
+                                      bool &FoundConst) {
   // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
   return ConstantOffsetExtractor(DL, GEP)
       .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
-            GEP->isInBounds())
+            GEP->isInBounds(), FoundConst)
       .getSExtValue();
 }
 
@@ -605,9 +658,13 @@
   for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
     if (isa<SequentialType>(*GTI)) {
       // Tries to extract a constant offset from this GEP index.
-      int64_t ConstantOffset =
-          ConstantOffsetExtractor::Find(GEP->getOperand(I), DL, GEP);
-      if (ConstantOffset != 0) {
+      bool FoundConst = false;
+      int64_t ConstantOffset = ConstantOffsetExtractor::Find(
+          GEP->getOperand(I), DL, GEP, FoundConst);
+      // Use FoundConst to check whether we need extraction. We don't check
+      // whether ConstantOffset is zero, as it can not cover some situations
+      // like (a - 4) + 4.
+      if (FoundConst) {
         NeedsExtraction = true;
         // A GEP may have multiple indices.  We accumulate the extracted
         // constant offset to a byte offset, and later offset the remainder of
@@ -652,15 +709,11 @@
   gep_type_iterator GTI = gep_type_begin(*GEP);
   for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
     if (isa<SequentialType>(*GTI)) {
-      Value *NewIdx = nullptr;
-      // Tries to extract a constant offset from this GEP index.
-      int64_t ConstantOffset =
-          ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP);
-      if (ConstantOffset != 0) {
-        assert(NewIdx != nullptr &&
-               "ConstantOffset != 0 implies NewIdx is set");
+      // Tries to extract constant offsets and build new index.
+      Value *NewIdx =
+          ConstantOffsetExtractor::Extract(GEP->getOperand(I), DL, GEP);
+      if (NewIdx != nullptr)
         GEP->setOperand(I, NewIdx);
-      }
     }
   }
   // Clear the inbounds attribute because the new index may be off-bound.
@@ -683,6 +736,9 @@
   // TODO(jingyue): do some range analysis to keep as many inbounds as
   // possible. GEPs with inbounds are more friendly to alias analysis.
   GEP->setIsInBounds(false);
+  // No need to create another GEP as the accumulative byte offset is 0.
+  if (AccumulativeByteOffset == 0)
+    return true;
 
   // Offsets the base with the accumulative byte offset.
   //
@@ -715,16 +771,16 @@
   Instruction *NewGEP = GEP->clone();
   NewGEP->insertBefore(GEP);
 
-  uint64_t ElementTypeSizeOfGEP =
-      DL->getTypeAllocSize(GEP->getType()->getElementType());
+  // Cast to int64_t as it used with int64_t.
+  int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
+      DL->getTypeAllocSize(GEP->getType()->getElementType()));
   Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
   if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
     // Very likely. As long as %gep is natually aligned, the byte offset we
     // extracted should be a multiple of sizeof(*%gep).
     // Per ANSI C standard, signed / unsigned = unsigned. Therefore, we
     // cast ElementTypeSizeOfGEP to signed.
-    int64_t Index =
-        AccumulativeByteOffset / static_cast<int64_t>(ElementTypeSizeOfGEP);
+    int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
     NewGEP = GetElementPtrInst::Create(
         NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP);
   } else {
Index: test/Transforms/SeparateConstOffsetFromGEP/AArch64/lit.local.cfg
===================================================================
--- /dev/null
+++ test/Transforms/SeparateConstOffsetFromGEP/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
+
Index: test/Transforms/SeparateConstOffsetFromGEP/AArch64/split-gep.ll
===================================================================
--- /dev/null
+++ test/Transforms/SeparateConstOffsetFromGEP/AArch64/split-gep.ll
@@ -0,0 +1,91 @@
+; RUN: opt < %s -separate-const-offset-from-gep -dce -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "arm64--linux-gnu"
+
+; Fix the bug related to incorrect result by using (int64 % uint64). In the
+; source code, we check (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0).
+; For this case, as the alloca size of %struct1 is 32, AccumulativeByteOffset is
+; (-2 * 32) = -64. And as the alloca size of %struct2 is 24,
+; ElementTypeSizeOfGEP is 24. But the result of such modulo is incorrectly equal
+; to 0.
+%struct3 = type { i64, i32 }
+%struct2 = type { %struct3, i32 }
+%struct1 = type { i64, %struct2 }
+%struct0 = type { i32, i32, i64*, [100 x %struct1] }
+define %struct2* @sign_mod_unsign_bug(%struct0* %ptr, i64 %idx) {
+entry:
+  %arrayidx = add nsw i64 %idx, -2
+  %ptr2 = getelementptr inbounds %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
+  ret %struct2* %ptr2
+}
+; CHECK-LABEL: @sign_mod_unsign_bug(
+; CHECK-NOT: add
+; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
+; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8*
+; CHECK: getelementptr i8* [[PTR1]], i64 -64
+; CHECK: bitcast
+; CHECK-NEXT: ret
+
+; Fix the bug related to or. It will generate incorrect result for this simple
+; test case.
+define float* @test_or_bug(i64 %a, float* %ptr) {
+entry:
+  %shl = shl i64 %a, 2
+  %add = add i64 %shl, 12
+  %or = or i64 %add, 1 ; ((b << 2) + 12) and 1 have no common bits
+  %p = getelementptr float* %ptr, i64 %or
+  ret float* %p
+}
+; CHECK-LABEL: @test_or_bug(
+; CHECK-NOT: add
+; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr float* %ptr, i64 %shl
+; CHECK: getelementptr float* [[PTR]], i64 13
+; CHECK-NEXT: ret
+
+; Check that (a - 4) + 4 can be optimized correctly.
+define i16* @find_0(i32 %a, i16* %ptr) {
+  %sub = add nsw i32 %a, -4
+  %ext = sext i32 %sub to i64
+  %sum = add nsw i64 %ext, 4
+  %incdec.ptr = getelementptr i16* %ptr, i64 %sum
+  ret i16* %incdec.ptr
+}
+; CHECK-LABEL: @find_0(
+; CHECK-NOT: add
+; CHECK: [[EXT:%[a-zA-Z0-9]+]] = sext
+; CHECK: getelementptr i16* %ptr, i64 [[EXT]]
+; CHECK-NEXT: ret
+
+; Check that ((a - 4) + 4) + 1 can be optimized correctly.
+define i16* @find_1(i32 %a, i16* %ptr) {
+  %sub = add nsw i32 %a, -4
+  %ext = sext i32 %sub to i64
+  %sum = add nsw i64 %ext, 4
+  %add.sum = add i64 %sum, 1
+  %incdec.ptr = getelementptr i16* %ptr, i64 %add.sum
+  ret i16* %incdec.ptr
+}
+; CHECK-LABEL: @find_1(
+; CHECK-NOT: add
+; CHECK: [[EXT:%[a-zA-Z0-9]+]] = sext
+; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr i16* %ptr, i64 [[EXT]]
+; CHECK: getelementptr i16* [[PTR]], i64 1
+
+; Check a more complex case: ((a + 4) - 8) + (b + 16)
+define i16* @find_other(i32 %a, i32 %b, i16* %ptr) {
+  %add = add nsw i32 %a, 4
+  %addext = sext i32 %add to i64
+  %subadd = add nsw i64 %addext, -8
+  %add2 = add nsw i32 %b, 16
+  %add2ext = sext i32 %add2 to i64
+  %addsum = add nsw i64 %subadd, %add2ext
+  %incdec.ptr = getelementptr i16* %ptr, i64 %addsum
+  ret i16* %incdec.ptr
+}
+; CHECK-LABEL: @find_other(
+; CHECK: sext i32
+; CHECK: sext i32
+; CHECK: [[SUM:%[a-zA-Z0-9]+]] = add
+; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr i16* %ptr, i64 [[SUM]]
+; CHECK: getelementptr i16* [[PTR]], i64 12