diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -2597,6 +2597,76 @@
   let hasFolder = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// SignedFloorDivIOp
+//===----------------------------------------------------------------------===//
+
+def SignedFloorDivIOp : IntArithmeticOp<"floordivi_signed"> {
+  let summary = "signed floor integer division operation";
+  let description = [{
+    Syntax:
+
+    ```
+    operation ::= ssa-id `=` `floordivi_signed` ssa-use `,` ssa-use `:` type
+    ```
+
+    Signed integer division. Rounds towards negative infinity. Treats the leading bit as
+    sign, i.e. `6 / -2 = -3`.
+
+    Note: the semantics of division by zero or signed division overflow (minimum
+    value divided by -1) is TBD; do NOT assume any specific behavior.
+
+    Example:
+
+    ```mlir
+    // Scalar signed integer division.
+    %a = floordivis %b, %c : i64
+
+    // SIMD vector element-wise division.
+    %f = floordivis %g, %h : vector<4xi32>
+
+    // Tensor element-wise integer division.
+    %x = floordivis %y, %z : tensor<4x?xi8>
+    ```
+  }];
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SignedCeilDivIOp
+//===----------------------------------------------------------------------===//
+
+def SignedCeilDivIOp : IntArithmeticOp<"ceildivi_signed"> {
+  let summary = "signed ceil integer division operation";
+  let description = [{
+    Syntax:
+
+    ```
+    operation ::= ssa-id `=` `ceildivi_signed` ssa-use `,` ssa-use `:` type
+    ```
+
+    Signed integer division. Rounds towards positive infinity. Treats the leading bit as
+    sign, i.e. `6 / -2 = -3`.
+
+    Note: the semantics of division by zero or signed division overflow (minimum
+    value divided by -1) is TBD; do NOT assume any specific behavior.
+
+    Example:
+
+    ```mlir
+    // Scalar signed integer division.
+    %a = ceildivis %b, %c : i64
+
+    // SIMD vector element-wise division.
+    %f = ceildivis %g, %h : vector<4xi32>
+
+    // Tensor element-wise integer division.
+    %x = ceildivis %y, %z : tensor<4x?xi8>
+    ```
+  }];
+  let hasFolder = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // SignedRemIOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir b/mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
@@ -0,0 +1,82 @@
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
+// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+func @transfer_read_2d(%A : memref<40xi32>, %base1: index) {
+  %i42 = constant -42: i32
+  %f = vector.transfer_read %A[%base1], %i42
+      {permutation_map = affine_map<(d0) -> (d0)>} :
+    memref<40xi32>, vector<40xi32>
+  vector.print %f: vector<40xi32>
+  return
+}
+
+func @entry() {
+  %c0 = constant 0: index
+  %c20 = constant 20: i32
+  %c10 = constant 10: i32
+  %cmin10 = constant -10: i32
+  %A = alloc() : memref<40xi32>
+
+  // print numerator
+  affine.for %i = 0 to 40  {
+    %ii = index_cast %i: index to i32
+    %ii30 = subi %ii, %c20 : i32
+    store %ii30, %A[%i] : memref<40xi32>
+  }
+  call @transfer_read_2d(%A, %c0) : (memref<40xi32>, index) -> ()
+
+  // test with ceil(*, 10)
+  affine.for %i = 0 to 40  {
+    %ii = index_cast %i: index to i32
+    %ii30 = subi %ii, %c20 : i32
+    %val = ceildivi_signed %ii30, %c10 : i32
+    store %val, %A[%i] : memref<40xi32>
+  }
+  call @transfer_read_2d(%A, %c0) : (memref<40xi32>, index) -> ()
+
+    // test with floor(*, 10)
+  affine.for %i = 0 to 40  {
+    %ii = index_cast %i: index to i32
+    %ii30 = subi %ii, %c20 : i32
+    %val = floordivi_signed %ii30, %c10 : i32
+    store %val, %A[%i] : memref<40xi32>
+  }
+  call @transfer_read_2d(%A, %c0) : (memref<40xi32>, index) -> ()
+
+
+  // test with ceil(*, -10)
+  affine.for %i = 0 to 40  {
+    %ii = index_cast %i: index to i32
+    %ii30 = subi %ii, %c20 : i32
+    %val = ceildivi_signed %ii30, %cmin10 : i32
+    store %val, %A[%i] : memref<40xi32>
+  }
+  call @transfer_read_2d(%A, %c0) : (memref<40xi32>, index) -> ()
+
+  // test with floor(*, -10)
+  affine.for %i = 0 to 40  {
+    %ii = index_cast %i: index to i32
+    %ii30 = subi %ii, %c20 : i32
+    %val = floordivi_signed %ii30, %cmin10 : i32
+    store %val, %A[%i] : memref<40xi32>
+  }
+  call @transfer_read_2d(%A, %c0) : (memref<40xi32>, index) -> ()
+
+  return
+}
+
+// List below is aligned for easy manual check
+// legend: num, ceil(num, 10), floor(num, 10), ceil(num, -10), floor(num, -10)
+//  ( -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 )
+//  (  -2,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2 )
+//  (  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -1, -1,  -1,-1, -1, -1, -1, -1, -1, -1, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 )
+//  (   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   1,  1,   1, 1,  1,  1,  1,  1,  1,  1, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+//  (   2,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,  0,   0, 0,  0,  0,  0,  0,  0,  0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2 )
+
+// CHECK:( -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 )
+// CHECK:( -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2 )
+// CHECK:( -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 )
+// CHECK:( 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+// CHECK:( 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2 )
\ No newline at end of file
diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
--- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
+++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
@@ -3276,6 +3276,88 @@
   }
 };
 
+class SignedFloorDivIOpLowering
+    : public ConvertOpToLLVMPattern<SignedFloorDivIOp> {
+public:
+  using ConvertOpToLLVMPattern<SignedFloorDivIOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto loc = op->getLoc();
+    SignedFloorDivIOpAdaptor adaptor(operands);
+    auto signedFloorDivIOp = cast<SignedFloorDivIOp>(op);
+    auto type = signedFloorDivIOp.getType();
+    auto a = signedFloorDivIOp.lhs();
+    auto b = signedFloorDivIOp.rhs();
+    auto plusOne =
+        rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, 1));
+    auto zero =
+        rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, 0));
+    auto minusOne =
+        rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, -1));
+    // Compute x = (b<0) ? 1 : -1.
+    auto compare = rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, b, zero);
+    auto x = rewriter.create<SelectOp>(loc, compare, plusOne, minusOne);
+    // Compute negative res: -1 - ((x-a)/b).
+    auto xMinusA = rewriter.create<SubIOp>(loc, x, a);
+    auto xMinusADivB = rewriter.create<SignedDivIOp>(loc, xMinusA, b);
+    auto negRes = rewriter.create<SubIOp>(loc, minusOne, xMinusADivB);
+    // Compute positive res: a/b.
+    auto posRes = rewriter.create<SignedDivIOp>(loc, a, b);
+    // Result is (a*b<0) ? negative result : positive result.
+    auto aTimesB = rewriter.create<MulIOp>(loc, a, b);
+    auto compareRes =
+        rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, aTimesB, zero);
+    auto res = rewriter.create<SelectOp>(loc, compareRes, negRes, posRes);
+    // Perform substitution and return success.
+    rewriter.replaceOp(op, {res});
+    return success();
+  }
+};
+
+class SignedCeilDivIOpLowering
+    : public ConvertOpToLLVMPattern<SignedCeilDivIOp> {
+public:
+  using ConvertOpToLLVMPattern<SignedCeilDivIOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto loc = op->getLoc();
+    SignedCeilDivIOpAdaptor adaptor(operands);
+    auto signedCeilDivIOp = cast<SignedCeilDivIOp>(op);
+    auto type = signedCeilDivIOp.getType();
+    auto a = signedCeilDivIOp.lhs();
+    auto b = signedCeilDivIOp.rhs();
+    auto plusOne =
+        rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, 1));
+    auto zero =
+        rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, 0));
+    auto minusOne =
+        rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, -1));
+    // Compute x = (b>0) ? -1 : 1.
+    auto compare = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt, b, zero);
+    auto x = rewriter.create<SelectOp>(loc, compare, minusOne, plusOne);
+    // Compute positive res: 1 + ((x+a)/b).
+    auto xPlusA = rewriter.create<AddIOp>(loc, x, a);
+    auto xPlusADivB = rewriter.create<SignedDivIOp>(loc, xPlusA, b);
+    auto posRes = rewriter.create<AddIOp>(loc, plusOne, xPlusADivB);
+    // Compute negative res: - ((-a)/b).
+    auto minusA = rewriter.create<SubIOp>(loc, zero, a);
+    auto minusADivB = rewriter.create<SignedDivIOp>(loc, minusA, b);
+    auto negRes = rewriter.create<SubIOp>(loc, zero, minusADivB);
+    // Result is (a*b>0) ? pos result : neg result.
+    auto aTimesB = rewriter.create<MulIOp>(loc, a, b);
+    auto compareRes =
+        rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt, aTimesB, zero);
+    auto res = rewriter.create<SelectOp>(loc, compareRes, posRes, negRes);
+    // Perform substitution and return success.
+    rewriter.replaceOp(op, {res});
+    return success();
+  }
+};
+
 } // namespace
 
 /// Try to match the kind of a std.atomic_rmw to determine whether to use a
@@ -3503,7 +3585,9 @@
       SelectOpLowering,
       ShiftLeftOpLowering,
       SignExtendIOpLowering,
+      SignedCeilDivIOpLowering,
       SignedDivIOpLowering,
+      SignedFloorDivIOpLowering,
       SignedRemIOpLowering,
       SignedShiftRightOpLowering,
       SinOpLowering,
diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
--- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
+++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
@@ -2419,6 +2419,113 @@
   return overflowOrDiv0 ? Attribute() : result;
 }
 
+//===----------------------------------------------------------------------===//
+// SignedFloorDivIOp
+//===----------------------------------------------------------------------===//
+
+static APInt SignedCeilNonnegInputs(APInt a, APInt b, bool &overflow) {
+  // Returns (a-1)/b + 1
+  APInt one(a.getBitWidth(), 1, true); // Signed value 1.
+  APInt val = a.ssub_ov(one, overflow).sdiv_ov(b, overflow);
+  return val.sadd_ov(one, overflow);
+}
+
+OpFoldResult SignedFloorDivIOp::fold(ArrayRef<Attribute> operands) {
+  assert(operands.size() == 2 && "binary operation takes two operands");
+
+  // Don't fold if it would overflow or if it requires a division by zero.
+  bool overflowOrDiv0 = false;
+  auto result = constFoldBinaryOp<IntegerAttr>(operands, [&](APInt a, APInt b) {
+    if (overflowOrDiv0 || !b) {
+      overflowOrDiv0 = true;
+      return a;
+    }
+    unsigned bits = a.getBitWidth();
+    APInt zero = APInt::getNullValue(bits);
+    if (a.sge(zero) && b.sgt(zero)) {
+      // Both positive (or a is zero), return a / b.
+      return a.sdiv_ov(b, overflowOrDiv0);
+    } else if (a.sle(zero) && b.slt(zero)) {
+      // Both negative (or a is zero), return -a / -b.
+      APInt posA = zero.ssub_ov(a, overflowOrDiv0);
+      APInt posB = zero.ssub_ov(b, overflowOrDiv0);
+      return posA.sdiv_ov(posB, overflowOrDiv0);
+    } else if (a.slt(zero) && b.sgt(zero)) {
+      // A is negative, b is positive, return - ceil(-a, b).
+      APInt posA = zero.ssub_ov(a, overflowOrDiv0);
+      APInt ceil = SignedCeilNonnegInputs(posA, b, overflowOrDiv0);
+      return zero.ssub_ov(ceil, overflowOrDiv0);
+    } else {
+      // A is positive, b is negative, return - ceil(a, -b).
+      APInt posB = zero.ssub_ov(b, overflowOrDiv0);
+      APInt ceil = SignedCeilNonnegInputs(a, posB, overflowOrDiv0);
+      return zero.ssub_ov(ceil, overflowOrDiv0);
+    }
+  });
+
+  // Fold out floor division by one. Assumes all tensors of all ones are
+  // splats.
+  if (auto rhs = operands[1].dyn_cast_or_null<IntegerAttr>()) {
+    if (rhs.getValue() == 1)
+      return lhs();
+  } else if (auto rhs = operands[1].dyn_cast_or_null<SplatElementsAttr>()) {
+    if (rhs.getSplatValue<IntegerAttr>().getValue() == 1)
+      return lhs();
+  }
+
+  return overflowOrDiv0 ? Attribute() : result;
+}
+
+//===----------------------------------------------------------------------===//
+// SignedCeilDivIOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult SignedCeilDivIOp::fold(ArrayRef<Attribute> operands) {
+  assert(operands.size() == 2 && "binary operation takes two operands");
+
+  // Don't fold if it would overflow or if it requires a division by zero.
+  bool overflowOrDiv0 = false;
+  auto result = constFoldBinaryOp<IntegerAttr>(operands, [&](APInt a, APInt b) {
+    if (overflowOrDiv0 || !b) {
+      overflowOrDiv0 = true;
+      return a;
+    }
+    unsigned bits = a.getBitWidth();
+    APInt zero = APInt::getNullValue(bits);
+    if (a.sgt(zero) && b.sgt(zero)) {
+      // Both positive, return ceil(a, b).
+      return SignedCeilNonnegInputs(a, b, overflowOrDiv0);
+    } else if (a.slt(zero) && b.slt(zero)) {
+      // Both negative, return ceil(-a, -b).
+      APInt posA = zero.ssub_ov(a, overflowOrDiv0);
+      APInt posB = zero.ssub_ov(b, overflowOrDiv0);
+      return SignedCeilNonnegInputs(posA, posB, overflowOrDiv0);
+    } else if (a.slt(zero) && b.sgt(zero)) {
+      // A is negative, b is positive, return - ( -a / b).
+      APInt posA = zero.ssub_ov(a, overflowOrDiv0);
+      APInt div = posA.sdiv_ov(b, overflowOrDiv0);
+      return zero.ssub_ov(div, overflowOrDiv0);
+    } else {
+      // A is positive (or zero), b is negative, return - (a / -b).
+      APInt posB = zero.ssub_ov(b, overflowOrDiv0);
+      APInt div = a.sdiv_ov(posB, overflowOrDiv0);
+      return zero.ssub_ov(div, overflowOrDiv0);
+    }
+  });
+
+  // Fold out floor division by one. Assumes all tensors of all ones are
+  // splats.
+  if (auto rhs = operands[1].dyn_cast_or_null<IntegerAttr>()) {
+    if (rhs.getValue() == 1)
+      return lhs();
+  } else if (auto rhs = operands[1].dyn_cast_or_null<SplatElementsAttr>()) {
+    if (rhs.getSplatValue<IntegerAttr>().getValue() == 1)
+      return lhs();
+  }
+
+  return overflowOrDiv0 ? Attribute() : result;
+}
+
 //===----------------------------------------------------------------------===//
 // SignedRemIOp
 //===----------------------------------------------------------------------===//
@@ -2481,7 +2588,8 @@
   assert(shapedType.getElementType() == constOperand.getType() &&
          "incorrect input attribute type for folding");
 
-  // SplatElementsAttr::get treats single value for second arg as being a splat.
+  // SplatElementsAttr::get treats single value for second arg as being a
+  // splat.
   return SplatElementsAttr::get(shapedType, {constOperand});
 }
 
@@ -3088,12 +3196,11 @@
 }
 
 namespace {
-
-/// Take a list of `values` with potential new constant to extract and a list
-/// of `constantValues` with`values.size()` sentinel that evaluate to true by
-/// applying `isDynamic`.
-/// Detects the `values` produced by a ConstantIndexOp and places the new
-/// constant in place of the corresponding sentinel value.
+/// Take a list of `values` with potential new constant to extract and a
+/// list of `constantValues` with`values.size()` sentinel that evaluate to
+/// true by applying `isDynamic`. Detects the `values` produced by a
+/// ConstantIndexOp and places the new constant in place of the
+/// corresponding sentinel value.
 void canonicalizeSubViewPart(SmallVectorImpl<Value> &values,
                              SmallVectorImpl<int64_t> &constantValues,
                              llvm::function_ref<bool(int64_t)> isDynamic) {
@@ -3145,7 +3252,8 @@
       return failure();
 
     // At least one of offsets/sizes/strides is a new constant.
-    // Form the new list of operands and constant attributes from the existing.
+    // Form the new list of operands and constant attributes from the
+    // existing.
     SmallVector<Value, 8> newOffsets(op.offsets());
     SmallVector<int64_t, 8> newStaticOffsets =
         extractFromI64ArrayAttr(op.static_offsets());
@@ -3342,7 +3450,8 @@
 
   LogicalResult matchAndRewrite(SubViewOp subViewOp,
                                 PatternRewriter &rewriter) const override {
-    // Any constant operand, just return to let SubViewOpConstantFolder kick in.
+    // Any constant operand, just return to let SubViewOpConstantFolder
+    // kick in.
     if (llvm::any_of(subViewOp.getOperands(), [](Value operand) {
           return matchPattern(operand, m_ConstantIndex());
         }))
@@ -3355,9 +3464,10 @@
     if (!canFoldIntoConsumerOp(castOp))
       return failure();
 
-    /// Deduce the resultType of the SubViewOp using `inferSubViewResultType` on
-    /// the cast source operand type and the SubViewOp static information. This
-    /// is the resulting type if the MemRefCastOp were folded.
+    /// Deduce the resultType of the SubViewOp using
+    /// `inferSubViewResultType` on the cast source operand type and the
+    /// SubViewOp static information. This is the resulting type if the
+    /// MemRefCastOp were folded.
     Type resultType = SubViewOp::inferResultType(
         castOp.source().getType().cast<MemRefType>(),
         extractFromI64ArrayAttr(subViewOp.static_offsets()),
@@ -3597,7 +3707,6 @@
 }
 
 namespace {
-
 /// Replaces chains of two tensor_cast operations by a single tensor_cast
 /// operation if doing so does not remove runtime constraints.
 struct ChainedTensorCast : public OpRewritePattern<TensorCastOp> {
@@ -3616,18 +3725,19 @@
     auto intermediateType = tensorCastOperand.getType().cast<TensorType>();
     auto resultType = tensorCast.getType().cast<TensorType>();
 
-    // We can remove the intermediate cast if joining all three produces the
-    // same result as just joining the source and result shapes.
+    // We can remove the intermediate cast if joining all three produces
+    // the same result as just joining the source and result shapes.
     auto firstJoin =
         joinShapes(joinShapes(sourceType, intermediateType), resultType);
 
-    // The join might not exist if the cast sequence would fail at runtime.
+    // The join might not exist if the cast sequence would fail at
+    // runtime.
     if (!firstJoin)
       return failure();
 
-    // The newJoin always exists if the above join exists, it might just contain
-    // less information. If so, we cannot drop the intermediate cast, as doing
-    // so would remove runtime checks.
+    // The newJoin always exists if the above join exists, it might just
+    // contain less information. If so, we cannot drop the intermediate
+    // cast, as doing so would remove runtime checks.
     auto newJoin = joinShapes(sourceType, resultType);
     if (firstJoin != newJoin)
       return failure();
@@ -3915,7 +4025,6 @@
 Value ViewOp::getViewSource() { return source(); }
 
 namespace {
-
 struct ViewOpShapeFolder : public OpRewritePattern<ViewOp> {
   using OpRewritePattern<ViewOp>::OpRewritePattern;
 
@@ -3959,7 +4068,8 @@
         // Dynamic shape dimension will be folded.
         newShapeConstants.push_back(constantIndexOp.getValue());
       } else {
-        // Dynamic shape dimension not folded; copy operand from old memref.
+        // Dynamic shape dimension not folded; copy operand from old
+        // memref.
         newShapeConstants.push_back(dimSize);
         newOperands.push_back(viewOp.sizes()[dynamicDimPos]);
       }
diff --git a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
--- a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
@@ -131,3 +131,51 @@
   %0 = transpose %arg0 (i, j, k) -> (k, i, j) : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d0 * s2 + d1)>>
   return
 }
+
+// -----
+
+// Test floor divide with signed integer
+// CHECK-LABEL: func @floordivi
+// CHECK-SAME : ([[ARG0:%[a-zA-Z0-9]+]]: !llvm.i32, [[ARG1:%[a-zA-Z0-9]+]]: !llvm.i32) -> !llvm.i32
+// CHECK      : [[ONE:%[a-zA-Z0-9]+]] = llvm.mlir.constant(1 : i32) : !llvm.i32
+// CHECK      : [[ZERO:%[a-zA-Z0-9]+]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+// CHECK      : [[MINONE:%[a-zA-Z0-9]+]] = llvm.mlir.constant(-1 : i32) : !llvm.i32
+// CHECK      : [[CMP1:%[a-zA-Z0-9]+]] = llvm.icmp "slt" [[ARG1]], [[ZERO]] : !llvm.i32
+// CHECK      : [[X:%[a-zA-Z0-9]+]] = llvm.select [[CMP1]], [[ONE]], [[MINONE]] : !llvm.i1, !llvm.i32
+// CHECK      : [[TRUE1:%[a-zA-Z0-9]+]] = llvm.sub [[X]], [[ARG0]] : !llvm.i32
+// CHECK      : [[TRUE2:%[a-zA-Z0-9]+]] = llvm.sdiv [[TRUE1]], [[ARG1]] : !llvm.i32
+// CHECK      : [[TRUE3:%[a-zA-Z0-9]+]] = llvm.sub [[MINONE]], [[TRUE2]] : !llvm.i32
+// CHECK      : [[FALSE:%[a-zA-Z0-9]+]] = llvm.sdiv [[ARG0]], [[ARG1]] : !llvm.i32
+// CHECK      : [[VAL:%[a-zA-Z0-9]+]] = llvm.mul [[ARG0]], [[ARG1]] : !llvm.i32
+// CHECK      : [[CMP2:%[a-zA-Z0-9]+]] = llvm.icmp "slt" [[VAL]], [[ZERO]] : !llvm.i32
+// CHECK      : [[RES:%[a-zA-Z0-9]+]] = llvm.select [[CMP2]], [[TRUE3]], [[FALSE]] : !llvm.i1, !llvm.i32
+// CHECK      : llvm.return [[RES]] : !llvm.i32
+func @floordivi(%arg0: i32, %arg1: i32) -> (i32) {
+  %res = floordivi_signed %arg0, %arg1 : i32
+  return %res : i32
+}
+
+// -----
+
+// Test ceil divide with signed integer
+// CHECK-LABEL: func @ceildivi
+// CHECK-SAME : ([[ARG0:%[a-zA-Z0-9]+]]: !llvm.i32, [[ARG1:%[a-zA-Z0-9]+]]: !llvm.i32) -> !llvm.i32
+// CHECK      : [[ONE:%[a-zA-Z0-9]+]] = llvm.mlir.constant(1 : i32) : !llvm.i32
+// CHECK      : [[ZERO:%[a-zA-Z0-9]+]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+// CHECK      : [[MINONE:%[a-zA-Z0-9]+]] = llvm.mlir.constant(-1 : i32) : !llvm.i32
+// CHECK      : [[CMP1:%[a-zA-Z0-9]+]] = llvm.icmp "sgt" [[ARG1]], [[ZERO]] : !llvm.i32
+// CHECK      : [[X:%[a-zA-Z0-9]+]] = llvm.select [[CMP1]], [[MINONE]], [[ONE]] : !llvm.i1, !llvm.i32
+// CHECK      : [[TRUE1:%[a-zA-Z0-9]+]] = llvm.add [[X]], [[ARG0]] : !llvm.i32
+// CHECK      : [[TRUE2:%[a-zA-Z0-9]+]] = llvm.sdiv [[TRUE1]], [[ARG1]] : !llvm.i32
+// CHECK      : [[TRUE3:%[a-zA-Z0-9]+]] = llvm.add [[ONE]], [[TRUE2]] : !llvm.i32
+// CHECK      : [[FALSE1:%[a-zA-Z0-9]+]] = llvm.sub [[ZERO]], [[ARG0]] : !llvm.i32
+// CHECK      : [[FALSE2:%[a-zA-Z0-9]+]] = llvm.sdiv [[FALSE1]], [[ARG1]] : !llvm.i32
+// CHECK      : [[FALSE3:%[a-zA-Z0-9]+]] = llvm.sub [[ZERO]], [[FALSE2]] : !llvm.i32
+// CHECK      : [[VAL:%[a-zA-Z0-9]+]] = llvm.mul [[ARG0]], [[ARG1]] : !llvm.i32
+// CHECK      : [[CMP2:%[a-zA-Z0-9]+]] = llvm.icmp "sgt" [[VAL]], [[ZERO]] : !llvm.i32
+// CHECK      : [[RES:%[a-zA-Z0-9]+]] = llvm.select [[CMP2]], [[TRUE3]], [[FALSE3]] : !llvm.i1, !llvm.i32
+// CHECK      : llvm.return [[RES]] : !llvm.i32
+func @ceildivi(%arg0: i32, %arg1: i32) -> (i32) {
+  %res = ceildivi_signed %arg0, %arg1 : i32
+  return %res : i32
+}
diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir
--- a/mlir/test/IR/core-ops.mlir
+++ b/mlir/test/IR/core-ops.mlir
@@ -569,6 +569,30 @@
   // CHECK: %{{[0-9]+}} = floorf %arg0 : tensor<4x4x?xf32>
   %166 = floorf %t : tensor<4x4x?xf32>
 
+  // CHECK: %{{[0-9]+}} = floordivi_signed %arg2, %arg2 : i32
+  %167 = floordivi_signed %i, %i : i32
+
+  // CHECK: %{{[0-9]+}} = floordivi_signed %arg3, %arg3 : index
+  %168 = floordivi_signed %idx, %idx : index
+
+  // CHECK: %{{[0-9]+}} = floordivi_signed %cst_5, %cst_5 : vector<42xi32>
+  %169 = floordivi_signed %vci32, %vci32 : vector<42 x i32>
+
+  // CHECK: %{{[0-9]+}} = floordivi_signed %cst_4, %cst_4 : tensor<42xi32>
+  %170 = floordivi_signed %tci32, %tci32 : tensor<42 x i32>
+
+  // CHECK: %{{[0-9]+}} = ceildivi_signed %arg2, %arg2 : i32
+  %171 = ceildivi_signed %i, %i : i32
+
+  // CHECK: %{{[0-9]+}} = ceildivi_signed %arg3, %arg3 : index
+  %172 = ceildivi_signed %idx, %idx : index
+
+  // CHECK: %{{[0-9]+}} = ceildivi_signed %cst_5, %cst_5 : vector<42xi32>
+  %173 = ceildivi_signed %vci32, %vci32 : vector<42 x i32>
+
+  // CHECK: %{{[0-9]+}} = ceildivi_signed %cst_4, %cst_4 : tensor<42xi32>
+  %174 = ceildivi_signed %tci32, %tci32 : tensor<42 x i32>
+
   return
 }
 
diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -949,6 +949,46 @@
 
 // -----
 
+// CHECK-LABEL: func @floordivi_signed_by_one
+// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
+func @floordivi_signed_by_one(%arg0: i32) -> (i32) {
+  %c1 = constant 1 : i32
+  %res = floordivi_signed %arg0, %c1 : i32
+  // CHECK: return %[[ARG]]
+  return %res : i32
+}
+
+// CHECK-LABEL: func @tensor_floordivi_signed_by_one
+// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
+func @tensor_floordivi_signed_by_one(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
+  %c1 = constant dense<1> : tensor<4x5xi32>
+  %res = floordivi_signed %arg0, %c1 : tensor<4x5xi32>
+  // CHECK: return %[[ARG]]
+  return %res : tensor<4x5xi32>
+}
+
+// -----
+
+// CHECK-LABEL: func @ceildivi_signed_by_one
+// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
+func @ceildivi_signed_by_one(%arg0: i32) -> (i32) {
+  %c1 = constant 1 : i32
+  %res = ceildivi_signed %arg0, %c1 : i32
+  // CHECK: return %[[ARG]]
+  return %res : i32
+}
+
+// CHECK-LABEL: func @tensor_ceildivi_signed_by_one
+// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
+func @tensor_ceildivi_signed_by_one(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
+  %c1 = constant dense<1> : tensor<4x5xi32>
+  %res = ceildivi_signed %arg0, %c1 : tensor<4x5xi32>
+  // CHECK: return %[[ARG]]
+  return %res : tensor<4x5xi32>
+}
+
+// -----
+
 // CHECK-LABEL: func @memref_cast_folding_subview
 func @memref_cast_folding_subview(%arg0: memref<4x5xf32>, %i: index) -> (memref<?x?xf32, offset:? , strides: [?, ?]>) {
   %0 = memref_cast %arg0 : memref<4x5xf32> to memref<?x?xf32>
diff --git a/mlir/test/Transforms/constant-fold.mlir b/mlir/test/Transforms/constant-fold.mlir
--- a/mlir/test/Transforms/constant-fold.mlir
+++ b/mlir/test/Transforms/constant-fold.mlir
@@ -402,6 +402,84 @@
 
 // -----
 
+// CHECK-LABEL: func @simple_floordivi_signed
+func @simple_floordivi_signed() -> (i32, i32, i32, i32, i32) {
+  // CHECK-DAG: [[C0:%.+]] = constant 0
+  %z = constant 0 : i32
+  // CHECK-DAG: [[C6:%.+]] = constant 7
+  %0 = constant 7 : i32
+  %1 = constant 2 : i32
+
+  // floor(7, 2) = 3
+  // CHECK-NEXT: [[C3:%.+]] = constant 3 : i32
+  %2 = floordivi_signed %0, %1 : i32
+
+  %3 = constant -2 : i32
+
+  // floor(7, -2) = -4
+  // CHECK-NEXT: [[CM3:%.+]] = constant -4 : i32
+  %4 = floordivi_signed %0, %3 : i32
+
+  %5 = constant -9 : i32
+
+  // floor(-9, 2) = -5
+  // CHECK-NEXT: [[CM4:%.+]] = constant -5 : i32
+  %6 = floordivi_signed %5, %1 : i32
+
+  %7 = constant -13 : i32
+
+  // floor(-13, -2) = 6
+  // CHECK-NEXT: [[CM5:%.+]] = constant 6 : i32
+  %8 = floordivi_signed %7, %3 : i32
+
+  // CHECK-NEXT: [[XZ:%.+]] = floordivi_signed [[C6]], [[C0]]
+  %9 = floordivi_signed %0, %z : i32
+
+  // CHECK-NEXT: return [[C3]], [[CM3]], [[CM4]], [[CM5]], [[XZ]]
+  return %2, %4, %6, %8, %9 : i32, i32, i32, i32, i32
+}
+
+// -----
+
+// CHECK-LABEL: func @simple_ceildivi_signed
+func @simple_ceildivi_signed() -> (i32, i32, i32, i32, i32) {
+  // CHECK-DAG: [[C0:%.+]] = constant 0
+  %z = constant 0 : i32
+  // CHECK-DAG: [[C6:%.+]] = constant 7
+  %0 = constant 7 : i32
+  %1 = constant 2 : i32
+
+  // ceil(7, 2) = 4
+  // CHECK-NEXT: [[C3:%.+]] = constant 4 : i32
+  %2 = ceildivi_signed %0, %1 : i32
+
+  %3 = constant -2 : i32
+
+  // ceil(7, -2) = -3
+  // CHECK-NEXT: [[CM3:%.+]] = constant -3 : i32
+  %4 = ceildivi_signed %0, %3 : i32
+
+  %5 = constant -9 : i32
+
+  // ceil(-9, 2) = -4
+  // CHECK-NEXT: [[CM4:%.+]] = constant -4 : i32
+  %6 = ceildivi_signed %5, %1 : i32
+
+  %7 = constant -15 : i32
+
+  // ceil(-15, -2) = 8
+  // CHECK-NEXT: [[CM5:%.+]] = constant 8 : i32
+  %8 = ceildivi_signed %7, %3 : i32
+
+  // CHECK-NEXT: [[XZ:%.+]] = ceildivi_signed [[C6]], [[C0]]
+  %9 = ceildivi_signed %0, %z : i32
+
+  // CHECK-NEXT: return [[C3]], [[CM3]], [[CM4]], [[CM5]], [[XZ]]
+  return %2, %4, %6, %8, %9 : i32, i32, i32, i32, i32
+}
+
+// -----
+
 // CHECK-LABEL: func @simple_remi_signed
 func @simple_remi_signed(%a : i32) -> (i32, i32, i32) {
   %0 = constant 5 : i32