Index: lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
===================================================================
--- lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -165,6 +165,10 @@
   void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const;
   /// Finds the first use of Used in U. Returns -1 if not found.
   static unsigned FindFirstUse(User *U, Value *Used);
+  /// Returns whether OPC (sext or zext) can be distributed to the operands of
+  /// BO. e.g., sext can be distributed to the operands of an "add nsw" because
+  /// sext (add nsw a, b) == add nsw (sext a), (sext b).
+  static bool Distributable(unsigned OPC, BinaryOperator *BO);
 
   /// The path from the constant offset to the old GEP index. e.g., if the GEP
   /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
@@ -223,6 +227,25 @@
   return new SeparateConstOffsetFromGEP();
 }
 
+bool ConstantOffsetExtractor::Distributable(unsigned OPC, BinaryOperator *BO) {
+  assert(OPC == Instruction::SExt || OPC == Instruction::ZExt);
+
+  // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
+  // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
+  if (BO->getOpcode() == Instruction::Add ||
+      BO->getOpcode() == Instruction::Sub) {
+    return (OPC == Instruction::SExt && BO->hasNoSignedWrap()) ||
+           (OPC == Instruction::ZExt && BO->hasNoUnsignedWrap());
+  }
+
+  // sext/zext (and/or/xor A, B) == and/or/xor (sext/zext A), (sext/zext B)
+  // -instcombine also leverages this invariant to do the reverse
+  // transformation to reduce integer casts.
+  return BO->getOpcode() == Instruction::And ||
+         BO->getOpcode() == Instruction::Or ||
+         BO->getOpcode() == Instruction::Xor;
+}
+
 int64_t ConstantOffsetExtractor::findInEitherOperand(User *U, bool IsSub) {
   assert(U->getNumOperands() == 2);
   int64_t ConstantOffset = find(U->getOperand(0));
@@ -273,21 +296,14 @@
           ConstantOffset = findInEitherOperand(U, false);
         break;
       }
-      case Instruction::SExt: {
-        // For safety, we trace into sext only when its operand is marked
-        // "nsw" because xxx.nsw guarantees no signed wrap. e.g., we can safely
-        // transform "sext (add nsw a, 5)" into "add nsw (sext a), 5".
-        if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0))) {
-          if (BO->hasNoSignedWrap())
-            ConstantOffset = find(U->getOperand(0));
-        }
-        break;
-      }
+      case Instruction::SExt:
       case Instruction::ZExt: {
-        // Similarly, we trace into zext only when its operand is marked with
-        // "nuw" because zext (add nuw a, b) == add nuw (zext a), (zext b).
+        // We trace into sext/zext if the operator can be distributed to its
+        // operand. e.g., we can transform into "sext (add nsw a, 5)" and
+        // extract constant 5, because
+        //   sext (add nsw a, 5) == add nsw (sext a), 5
         if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0))) {
-          if (BO->hasNoUnsignedWrap())
+          if (Distributable(O->getOpcode(), BO))
             ConstantOffset = find(U->getOperand(0));
         }
         break;
Index: test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
===================================================================
--- test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -58,6 +58,25 @@
 ; CHECK: [[BASE_INT:%[0-9]+]] = ptrtoint float* [[BASE_PTR]] to i64
 ; CHECK: add i64 [[BASE_INT]], 132
 
+; Similar to @ext_add_no_overflow, we should be able to trace into sext/zext if
+; its operand is an "or" instruction.
+define float* @ext_or(i64 %a, i32 %b) {
+entry:
+  %b1 = shl i32 %b, 2
+  %b2 = or i32 %b1, 1
+  %b3 = or i32 %b1, 2
+  %b2.ext = sext i32 %b2 to i64
+  %b3.ext = sext i32 %b3 to i64
+  %i = add i64 %a, %b2.ext
+  %j = add i64 %a, %b3.ext
+  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %j
+  ret float* %p
+}
+; CHECK-LABEL: @ext_or
+; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}
+; CHECK: [[BASE_INT:%[0-9]+]] = ptrtoint float* [[BASE_PTR]] to i64
+; CHECK: add i64 [[BASE_INT]], 136
+
 ; We should treat "or" with no common bits (%k) as "add", and leave "or" with
 ; potentially common bits (%l) as is.
 define float* @or(i64 %i) {