diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -208,7 +208,10 @@
       "ArrayAttr":$indexingMaps, "ArrayAttr":$iteratorTypes)>,
     OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc,
       "ArrayRef<ArrayRef<AffineExpr>>":$indexingExprs,
-      "ArrayRef<StringRef>":$iteratorTypes)>
+      "ArrayRef<StringRef>":$iteratorTypes)>,
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc,
+      "ArrayAttr":$indexingMaps, "ArrayAttr":$iteratorTypes,
+      "CombiningKind":$kind)>
   ];
   let extraClassDeclaration = [{
     VectorType getLhsType() {
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -501,13 +501,20 @@
                                   Value lhs, Value rhs, Value acc,
                                   ArrayAttr indexingMaps,
                                   ArrayAttr iteratorTypes) {
+  build(builder, result, lhs, rhs, acc, indexingMaps, iteratorTypes,
+        ContractionOp::getDefaultKind());
+}
+
+void vector::ContractionOp::build(OpBuilder &builder, OperationState &result,
+                                  Value lhs, Value rhs, Value acc,
+                                  ArrayAttr indexingMaps,
+                                  ArrayAttr iteratorTypes, CombiningKind kind) {
   result.addOperands({lhs, rhs, acc});
   result.addTypes(acc.getType());
   result.addAttribute(getIndexingMapsAttrName(), indexingMaps);
   result.addAttribute(getIteratorTypesAttrName(), iteratorTypes);
   result.addAttribute(ContractionOp::getKindAttrName(),
-                      CombiningKindAttr::get(ContractionOp::getDefaultKind(),
-                                             builder.getContext()));
+                      CombiningKindAttr::get(kind, builder.getContext()));
 }
 
 static ParseResult parseContractionOp(OpAsmParser &parser,
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp
--- a/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
 #include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
 #include "mlir/Dialect/Vector/Utils/VectorUtils.h"
 #include "mlir/IR/Builders.h"
@@ -220,6 +221,128 @@
   }
 };
 
+// Turns vector.contract on vector with leading 1 dimensions into
+// vector.extract followed by vector.contract on vector without leading
+// 1 dimensions. Also performs tranpose of lhs and rhs operands if required
+// prior to extract
+struct CastAwayContractionLeadingOneDim
+    : public OpRewritePattern<vector::ContractionOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(vector::ContractionOp contractOp,
+                                PatternRewriter &rewriter) const override {
+    VectorType oldAccType =
+        contractOp.getAccType().dyn_cast_or_null<VectorType>();
+    if (oldAccType == nullptr)
+      return failure();
+    if (oldAccType.getRank() < 2)
+      return failure();
+    // TODO: implement masks
+    if (llvm::size(contractOp.masks()) != 0)
+      return failure();
+    if (oldAccType.getShape()[0] != 1)
+      return failure();
+    int64_t dropDim = 1; // currently we support only dropping one dim but the
+                         // pattern can be applied greedily to drop more
+
+    auto oldIndexingMaps = contractOp.getIndexingMaps();
+    SmallVector<AffineMap, 4> newIndexingMaps;
+
+    auto oldIteratorTypes = contractOp.iterator_types();
+    SmallVector<Attribute, 4> newIteratorTypes;
+
+    int64_t dimToDrop = oldIndexingMaps[2].getDimPosition(0);
+
+    if (!isParallelIterator(
+            oldIteratorTypes[dimToDrop])) // only parallel type iterators can be
+                                          // dropped
+      return failure();
+
+    for (const auto &it : llvm::enumerate(oldIteratorTypes)) {
+      int64_t currDim = it.index();
+      if (currDim == dimToDrop) {
+        continue;
+      }
+      newIteratorTypes.push_back(it.value());
+    }
+
+    SmallVector<Value, 4> operands = {contractOp.lhs(), contractOp.rhs(),
+                                      contractOp.acc()};
+    SmallVector<Value, 4> newOperands;
+
+    for (const auto &it : llvm::enumerate(oldIndexingMaps)) {
+      bool validExtract =
+          false; // Check if the dim to be dropped exists as a leading dim in
+                 // the operand if it does then we use vector.extract to drop it
+      SmallVector<AffineExpr, 4> results;
+      auto map = it.value();
+      int64_t orginalZeroDim = it.value().getDimPosition(0);
+      if (orginalZeroDim != dimToDrop) {
+        bool tranposeNeeded =
+            false; // There are two reasons to be in this path, 1. We need to
+                   // tranpose the operand to make the dim to be dropped
+                   // leading. 2. The dim to be dropped does not exist and in
+                   // that case we dont want to add a unit tranpose but we must
+                   // check all the indices to make sure this is the case.
+        SmallVector<int64_t> perm;
+        SmallVector<AffineExpr, 4> transposeResults;
+
+        for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) {
+          int64_t currDim = map.getDimPosition(i);
+          if (currDim == dimToDrop) {
+            tranposeNeeded = true;
+            perm.insert(perm.begin(), i);
+            auto targetExpr = rewriter.getAffineDimExpr(currDim);
+            transposeResults.insert(transposeResults.begin(), targetExpr);
+          } else {
+            perm.push_back(i);
+            auto targetExpr = rewriter.getAffineDimExpr(currDim);
+            transposeResults.push_back(targetExpr);
+          }
+        }
+        if (tranposeNeeded) { // Do the tranpose now so that we can drop the
+                              // correct dim using extract later
+          map = AffineMap::get(map.getNumDims(), 0, transposeResults,
+                               contractOp.getContext());
+          operands[it.index()] = rewriter.create<vector::TransposeOp>(
+              contractOp.getLoc(), operands[it.index()], perm);
+        }
+      }
+      if (map.getDimPosition(0) ==
+          dimToDrop) // We have taken care to have the dim to be dropped be
+                     // the leading dim. If its still not leading that means it
+                     // does not exist in this operand and hence we do not need
+                     // an extract
+        validExtract = true;
+
+      for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) {
+        int64_t currDim = map.getDimPosition(i);
+        if (currDim == dimToDrop) // This is the dim we are removing
+          continue;
+        auto targetExpr = rewriter.getAffineDimExpr(
+            currDim < dimToDrop ? currDim : currDim - 1);
+        results.push_back(targetExpr);
+      }
+      newIndexingMaps.push_back(AffineMap::get(map.getNumDims() - 1, 0, results,
+                                               contractOp.getContext()));
+      // Now extract if its a valid extraction, otherwise use the operand
+      // without extraction
+      newOperands.push_back(validExtract
+                                ? rewriter.create<vector::ExtractOp>(
+                                      contractOp.getLoc(), operands[it.index()],
+                                      splatZero(dropDim))
+                                : operands[it.index()]);
+    }
+    auto newContractOp = rewriter.create<vector::ContractionOp>(
+        contractOp.getLoc(), newOperands[0], newOperands[1], newOperands[2],
+        rewriter.getAffineMapArrayAttr(newIndexingMaps),
+        rewriter.getArrayAttr(newIteratorTypes), contractOp.kind());
+    rewriter.replaceOpWithNewOp<vector::BroadcastOp>(
+        contractOp, contractOp->getResultTypes()[0], newContractOp);
+    return success();
+  }
+};
+
 class CastAwayElementwiseLeadingOneDim : public RewritePattern {
 public:
   CastAwayElementwiseLeadingOneDim(MLIRContext *context)
@@ -260,10 +383,11 @@
 
 void mlir::vector::populateCastAwayVectorLeadingOneDimPatterns(
     RewritePatternSet &patterns) {
-  patterns.add<CastAwayExtractStridedSliceLeadingOneDim,
-               CastAwayInsertStridedSliceLeadingOneDim,
-               CastAwayTransferReadLeadingOneDim,
-               CastAwayTransferWriteLeadingOneDim,
-               CastAwayElementwiseLeadingOneDim>(patterns.getContext());
+  patterns
+      .add<CastAwayExtractStridedSliceLeadingOneDim,
+           CastAwayInsertStridedSliceLeadingOneDim,
+           CastAwayTransferReadLeadingOneDim,
+           CastAwayTransferWriteLeadingOneDim, CastAwayElementwiseLeadingOneDim,
+           CastAwayContractionLeadingOneDim>(patterns.getContext());
   populateShapeCastFoldingPatterns(patterns);
 }
diff --git a/mlir/test/Dialect/Vector/vector-transforms-nounroll.mlir b/mlir/test/Dialect/Vector/vector-transforms-nounroll.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/Dialect/Vector/vector-transforms-nounroll.mlir
@@ -0,0 +1,166 @@
+// RUN: mlir-opt %s -test-vector-to-vector-lowering -split-input-file| FileCheck %s
+
+// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)>
+// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+
+// CHECK-LABEL: cast_away_contraction_leading_one_dims
+//  CHECK-NEXT:   %[[R0:.+]] =  vector.extract %{{.*}}[0] : vector<1x16x8xf32>
+//  CHECK-NEXT:   %[[R1:.+]] =  vector.extract %{{.*}}[0] : vector<1x8x16xf32>
+//  CHECK-NEXT:   %[[R2:.+]] =  vector.extract %{{.*}}[0] : vector<1x16x16xf32>
+//  CHECK-NEXT:   %[[R3:.+]] = vector.contract {indexing_maps = [#[[$map0]], #[[$map1]], #[[$map2]]],
+//  CHECK-SAME:   iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>}
+//  CHECK-SAME:   %[[R0]], %[[R1]], %[[R2]] : vector<16x8xf32>, vector<8x16xf32> into vector<16x16xf32>
+//  CHECK-NEXT:   %[[R4:.+]] = vector.broadcast %[[R3]] : vector<16x16xf32> to vector<1x16x16xf32>
+//  CHECK-NEXT:  return %[[R4]] : vector<1x16x16xf32>
+
+#contraction_accesses0 = [
+  affine_map<(l, i, j, k) -> (l, i, k)>,
+  affine_map<(l, i, j, k) -> (l, k, j)>,
+  affine_map<(l, i, j, k) -> (l, i, j)>
+]
+#contraction_trait0 = {
+  indexing_maps = #contraction_accesses0,
+  iterator_types = ["parallel", "parallel", "parallel", "reduction"]
+}
+
+func @cast_away_contraction_leading_one_dims(%arg0: vector<1x16x8xf32>, %arg1: vector<1x8x16xf32>, %arg2: vector<1x16x16xf32>) -> vector<1x16x16xf32> {
+  %0 = vector.contract #contraction_trait0 %arg0, %arg1, %arg2  : vector<1x16x8xf32>, vector<1x8x16xf32> into vector<1x16x16xf32>
+  return %0: vector<1x16x16xf32>
+}
+
+// -----
+// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0, d1) -> (d1)>
+// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0, d1) -> (d1, d0)>
+// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1) -> (d0)>
+
+// CHECK-LABEL: cast_away_contraction_leading_one_dims_transposeneeded
+//  CHECK-NEXT:   %[[R0:.+]] =  vector.extract %{{.*}}[0] : vector<1x8x16xf32>
+//  CHECK-NEXT:   %[[R1:.+]] =  vector.extract %{{.*}}[0, 0] : vector<1x1x8xf32>
+//  CHECK-NEXT:   %[[R2:.+]] =  vector.extract %{{.*}}[0, 0] : vector<1x1x16xf32>
+//  CHECK-NEXT:   %[[R3:.+]] = vector.contract {indexing_maps = [#[[$map0]], #[[$map1]], #[[$map2]]],
+//  CHECK-SAME:   iterator_types = ["parallel", "reduction"], kind = #vector.kind<mul>}
+//  CHECK-SAME:   %[[R1]], %[[R0]], %[[R2]] : vector<8xf32>, vector<8x16xf32> into vector<16xf32>
+//  CHECK-NEXT:   %[[R4:.+]] = vector.broadcast %[[R3]] : vector<16xf32> to vector<1x16xf32>
+//  CHECK-NEXT:   %[[R5:.+]] = vector.broadcast %[[R4]] : vector<1x16xf32> to vector<1x1x16xf32>
+//  CHECK-NEXT:  return %[[R5]] : vector<1x1x16xf32>
+
+#contraction_accesses1 = [
+  affine_map<(l, i, j, k) -> (i, l, k)>,
+  affine_map<(l, i, j, k) -> (l, k, j)>,
+  affine_map<(l, i, j, k) -> (l, i, j)>
+]
+#contraction_trait1 = {
+  indexing_maps = #contraction_accesses1,
+  iterator_types = ["parallel", "parallel", "parallel", "reduction"],
+  kind = #vector.kind<mul>
+}
+
+func @cast_away_contraction_leading_one_dims_transposeneeded(%arg0: vector<1x1x8xf32>, %arg1: vector<1x8x16xf32>, %arg2: vector<1x1x16xf32>) -> vector<1x1x16xf32> {
+  %0 = vector.contract #contraction_trait1 %arg0, %arg1, %arg2  : vector<1x1x8xf32>, vector<1x8x16xf32> into vector<1x1x16xf32>
+  return %0: vector<1x1x16xf32>
+}
+
+// -----
+// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)>
+// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+
+// CHECK-LABEL: cast_away_contraction_leading_one_dims_transposeneeded2
+//  CHECK-NEXT:   %[[R0:.+]] =  vector.transpose %{{.*}}[1, 0, 2] : vector<8x1x16xf32> to vector<1x8x16xf32>
+//  CHECK-NEXT:   %[[R1:.+]] =  vector.extract %[[R0]][0] : vector<1x8x16xf32>
+//  CHECK-NEXT:   %[[R2:.+]] =  vector.transpose %{{.*}}[2, 0, 1] : vector<2x8x1xf32> to vector<1x2x8xf32>
+//  CHECK-NEXT:   %[[R3:.+]] =  vector.extract %[[R2]][0] : vector<1x2x8xf32>
+//  CHECK-NEXT:   %[[R4:.+]] =  vector.extract %{{.*}}[0] : vector<1x2x16xf32>
+//  CHECK-NEXT:   %[[R5:.+]] = vector.contract {indexing_maps = [#[[$map0]], #[[$map1]], #[[$map2]]],
+//  CHECK-SAME:   iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>}
+//  CHECK-SAME:   %[[R1]], %[[R3]], %[[R4]] : vector<8x16xf32>, vector<2x8xf32> into vector<2x16xf32>
+//  CHECK-NEXT:   %[[R6:.+]] = vector.broadcast %[[R5]] : vector<2x16xf32> to vector<1x2x16xf32>
+//  CHECK-NEXT:  return %[[R6]] : vector<1x2x16xf32>
+
+#contraction_accesses2 = [
+  affine_map<(l, i, j, k) -> (k, l, j)>,
+  affine_map<(l, i, j, k) -> (i, k, l)>,
+  affine_map<(l, i, j, k) -> (l, i, j)>
+]
+#contraction_trait2 = {
+  indexing_maps = #contraction_accesses2,
+  iterator_types = ["parallel", "parallel", "parallel", "reduction"]
+}
+
+
+func @cast_away_contraction_leading_one_dims_transposeneeded2(%arg0: vector<8x1x16xf32>, %arg1: vector<2x8x1xf32>, %arg2: vector<1x2x16xf32>) -> vector<1x2x16xf32> {
+  %0 = vector.contract #contraction_trait2 %arg0, %arg1, %arg2  : vector<8x1x16xf32>, vector<2x8x1xf32> into vector<1x2x16xf32>
+  return %0: vector<1x2x16xf32>
+}
+
+// -----
+// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)>
+// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+
+
+// CHECK-LABEL: cast_away_contraction_leading_one_dims_nonleadingunitdim_rank4
+//  CHECK-NEXT:   %[[R0:.+]] =  vector.extract %{{.*}}[0] : vector<1x8x1x16xf32>
+//  CHECK-NEXT:   %[[R1:.+]] =  vector.extract %{{.*}}[0] : vector<1x2x8x1xf32>
+//  CHECK-NEXT:   %[[R2:.+]] =  vector.transpose %[[R0]], [1, 0, 2] : vector<8x1x16xf32> to vector<1x8x16xf32>
+//  CHECK-NEXT:   %[[R3:.+]] =  vector.extract %[[R2]][0] : vector<1x8x16xf32>
+//  CHECK-NEXT:   %[[R4:.+]] =  vector.transpose %[[R1]], [2, 0, 1] : vector<2x8x1xf32> to vector<1x2x8xf32>
+//  CHECK-NEXT:   %[[R5:.+]] =  vector.extract %[[R4]][0] : vector<1x2x8xf32>
+//  CHECK-NEXT:   %[[R6:.+]] =  vector.extract %{{.*}}[0, 0] : vector<1x1x2x16xf32>
+//  CHECK-NEXT:   %[[R7:.+]] =  vector.contract {indexing_maps = [#[[$map0]], #[[$map1]], #[[$map2]]],
+//  CHECK-SAME:   iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>}
+//  CHECK-SAME:   %[[R3]], %[[R5]], %[[R6]] : vector<8x16xf32>, vector<2x8xf32> into vector<2x16xf32>
+//  CHECK-NEXT:   %[[R8:.+]] =  vector.broadcast %[[R7]] : vector<2x16xf32> to vector<1x2x16xf32>
+//  CHECK-NEXT:   %[[R9:.+]] =  vector.broadcast %[[R8]] : vector<1x2x16xf32> to vector<1x1x2x16xf32>
+//  CHECK-NEXT:  return %[[R9]] : vector<1x1x2x16xf32>
+
+#contraction_accesses2 = [
+  affine_map<(m, l, i, j, k) -> (m, k, l, j)>,
+  affine_map<(m, l, i, j, k) -> (m, i, k, l)>,
+  affine_map<(m, l, i, j, k) -> (m, l, i, j)>
+]
+#contraction_trait2 = {
+  indexing_maps = #contraction_accesses2,
+  iterator_types = ["parallel","parallel", "parallel", "parallel", "reduction"]
+}
+
+
+func @cast_away_contraction_leading_one_dims_nonleadingunitdim_rank4(%arg0: vector<1x8x1x16xf32>, %arg1: vector<1x2x8x1xf32>, %arg2: vector<1x1x2x16xf32>) -> vector<1x1x2x16xf32> {
+  %0 = vector.contract #contraction_trait2 %arg0, %arg1, %arg2  : vector<1x8x1x16xf32>, vector<1x2x8x1xf32> into vector<1x1x2x16xf32>
+  return %0: vector<1x1x2x16xf32>
+}
+
+// -----
+// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)>
+// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+
+// CHECK-LABEL: cast_away_contraction_leading_one_dims_nonleadingunitdim_rank4_acctranspose
+//  CHECK-NEXT:   %[[R0:.+]] =  vector.transpose %{{.*}}, [2, 0, 1, 3] : vector<1x8x1x16xf32> to vector<1x1x8x16xf32>
+//  CHECK-NEXT:   %[[R1:.+]] =  vector.transpose %{{.*}}, [3, 0, 1, 2] : vector<1x2x8x1xf32> to vector<1x1x2x8xf32>
+//  CHECK-NEXT:   %[[R2:.+]] =  vector.extract %[[R0]][0, 0] : vector<1x1x8x16xf32>
+//  CHECK-NEXT:   %[[R3:.+]] =  vector.extract %[[R1]][0, 0] : vector<1x1x2x8xf32>
+//  CHECK-NEXT:   %[[R4:.+]] =  vector.extract %{{.*}}[0, 0] : vector<1x1x2x16xf32>
+//  CHECK-NEXT:   %[[R5:.+]] =  vector.contract {indexing_maps = [#[[$map0]], #[[$map1]], #[[$map2]]],
+//  CHECK-SAME:   iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>}
+//  CHECK-SAME:   %[[R2]], %[[R3]], %[[R4]] : vector<8x16xf32>, vector<2x8xf32> into vector<2x16xf32>
+//  CHECK-NEXT:   %[[R6:.+]] =  vector.broadcast %[[R5]] : vector<2x16xf32> to vector<1x2x16xf32>
+//  CHECK-NEXT:   %[[R7:.+]] =  vector.broadcast %[[R6]] : vector<1x2x16xf32> to vector<1x1x2x16xf32>
+//  CHECK-NEXT:  return %[[R7]] : vector<1x1x2x16xf32>
+
+#contraction_accesses3 = [
+  affine_map<(m, l, i, j, k) -> (m, k, l, j)>,
+  affine_map<(m, l, i, j, k) -> (m, i, k, l)>,
+  affine_map<(m, l, i, j, k) -> (l, m, i, j)>
+]
+#contraction_trait3 = {
+  indexing_maps = #contraction_accesses3,
+  iterator_types = ["parallel","parallel", "parallel", "parallel", "reduction"]
+}
+
+func @cast_away_contraction_leading_one_dims_nonleadingunitdim_rank4_acctranspose(%arg0: vector<1x8x1x16xf32>, %arg1: vector<1x2x8x1xf32>, %arg2: vector<1x1x2x16xf32>) -> vector<1x1x2x16xf32> {
+  %0 = vector.contract #contraction_trait3 %arg0, %arg1, %arg2  : vector<1x8x1x16xf32>, vector<1x2x8x1xf32> into vector<1x1x2x16xf32>
+  return %0: vector<1x1x2x16xf32>
+}
+