diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -2685,6 +2685,21 @@
     /// constructed with `b` at location `loc`.
     SmallVector<Range, 8> getOrCreateRanges(OpBuilder &b, Location loc);
 
+    /// Return the offsets as Values. Each Value is either the dynamic
+    /// value specified in the op or a ConstantIndexOp constructed
+    /// with `b` at location `loc`
+    SmallVector<Value, 4> getOrCreateOffsets(OpBuilder &b, Location loc);
+
+    /// Return the sizes as Values. Each Value is either the dynamic
+    /// value specified in the op or a ConstantIndexOp constructed
+    /// with `b` at location `loc`
+    SmallVector<Value, 4> getOrCreateSizes(OpBuilder &b, Location loc);
+
+    /// Return the strides as Values. Each Value is either the dynamic
+    /// value specified in the op or a ConstantIndexOp constructed with
+    /// `b` at location `loc`
+    SmallVector<Value, 4> getOrCreateStrides(OpBuilder &b, Location loc);
+
     /// A subview result type can be fully inferred from the source type and the
     /// static representation of offsets, sizes and strides. Special sentinels
     /// encode the dynamic case.
diff --git a/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp
--- a/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp
+++ b/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp
@@ -64,34 +64,15 @@
   // TODO: Aborting when the offsets are static. There might be a way to fold
   // the subview op with load even if the offsets have been canonicalized
   // away.
-  if (subViewOp.getNumOffsets() == 0)
-    return failure();
-
-  ValueRange opOffsets = subViewOp.offsets();
-  SmallVector<Value, 2> opStrides;
-  if (subViewOp.getNumStrides()) {
-    // If the strides are dynamic, get the stride operands.
-    opStrides = llvm::to_vector<2>(subViewOp.strides());
-  } else {
-    // When static, the stride operands can be retrieved by taking the strides
-    // of the result of the subview op, and dividing the strides of the base
-    // memref.
-    SmallVector<int64_t, 2> staticStrides;
-    if (failed(subViewOp.getStaticStrides(staticStrides))) {
-      return failure();
-    }
-    opStrides.reserve(opOffsets.size());
-    for (auto stride : staticStrides) {
-      auto constValAttr = rewriter.getIntegerAttr(
-          IndexType::get(rewriter.getContext()), stride);
-      opStrides.emplace_back(rewriter.create<ConstantOp>(loc, constValAttr));
-    }
-  }
-  assert(opOffsets.size() == opStrides.size());
+  SmallVector<Value, 4> opOffsets = subViewOp.getOrCreateOffsets(rewriter, loc);
+  SmallVector<Value, 4> opStrides = subViewOp.getOrCreateStrides(rewriter, loc);
+  assert(opOffsets.size() == indices.size() &&
+         "expected as many indices as rank of subview op result type");
+  assert(opStrides.size() == indices.size() &&
+         "expected as many indices as rank of subview op result type");
 
   // New indices for the load are the current indices * subview_stride +
   // subview_offset.
-  assert(indices.size() == opStrides.size());
   sourceIndices.resize(indices.size());
   for (auto index : llvm::enumerate(indices)) {
     auto offset = opOffsets[index.index()];
diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
--- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
+++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
@@ -2548,6 +2548,44 @@
   return res;
 }
 
+SmallVector<Value, 4> SubViewOp::getOrCreateOffsets(OpBuilder &b,
+                                                    Location loc) {
+  unsigned dynamicIdx = 1;
+  return llvm::to_vector<4>(llvm::map_range(
+      static_offsets().cast<ArrayAttr>(), [&](Attribute a) -> Value {
+        int64_t staticOffset = a.cast<IntegerAttr>().getInt();
+        if (ShapedType::isDynamicStrideOrOffset(staticOffset))
+          return getOperand(dynamicIdx++);
+        else
+          return b.create<ConstantIndexOp>(loc, staticOffset);
+      }));
+}
+
+SmallVector<Value, 4> SubViewOp::getOrCreateSizes(OpBuilder &b, Location loc) {
+  unsigned dynamicIdx = 1 + offsets().size();
+  return llvm::to_vector<4>(llvm::map_range(
+      static_sizes().cast<ArrayAttr>(), [&](Attribute a) -> Value {
+        int64_t staticSize = a.cast<IntegerAttr>().getInt();
+        if (ShapedType::isDynamic(staticSize))
+          return getOperand(dynamicIdx++);
+        else
+          return b.create<ConstantIndexOp>(loc, staticSize);
+      }));
+}
+
+SmallVector<Value, 4> SubViewOp::getOrCreateStrides(OpBuilder &b,
+                                                    Location loc) {
+  unsigned dynamicIdx = 1 + offsets().size() + sizes().size();
+  return llvm::to_vector<4>(llvm::map_range(
+      static_strides().cast<ArrayAttr>(), [&](Attribute a) -> Value {
+        int64_t staticStride = a.cast<IntegerAttr>().getInt();
+        if (ShapedType::isDynamicStrideOrOffset(staticStride))
+          return getOperand(dynamicIdx++);
+        else
+          return b.create<ConstantIndexOp>(loc, staticStride);
+      }));
+}
+
 LogicalResult
 SubViewOp::getStaticStrides(SmallVectorImpl<int64_t> &staticStrides) {
   if (!strides().empty())
diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
@@ -23,25 +23,25 @@
 
   // CHECK-LABEL: spv.module Logical GLSL450
   gpu.module @kernels {
-    // CHECK-DAG: spv.globalVariable [[NUMWORKGROUPSVAR:@.*]] built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
-    // CHECK-DAG: spv.globalVariable [[LOCALINVOCATIONIDVAR:@.*]] built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
-    // CHECK-DAG: spv.globalVariable [[WORKGROUPIDVAR:@.*]] built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
+    // CHECK-DAG: spv.globalVariable @[[NUMWORKGROUPSVAR:.*]] built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
+    // CHECK-DAG: spv.globalVariable @[[LOCALINVOCATIONIDVAR:.*]] built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
+    // CHECK-DAG: spv.globalVariable @[[WORKGROUPIDVAR:.*]] built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
     // CHECK-LABEL:    spv.func @load_store_kernel
-    // CHECK-SAME: [[ARG0:%.*]]: !spv.ptr<!spv.struct<!spv.array<48 x f32, stride=4> [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 0)>}
-    // CHECK-SAME: [[ARG1:%.*]]: !spv.ptr<!spv.struct<!spv.array<48 x f32, stride=4> [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 1)>}
-    // CHECK-SAME: [[ARG2:%.*]]: !spv.ptr<!spv.struct<!spv.array<48 x f32, stride=4> [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 2)>}
-    // CHECK-SAME: [[ARG3:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 3), StorageBuffer>}
-    // CHECK-SAME: [[ARG4:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 4), StorageBuffer>}
-    // CHECK-SAME: [[ARG5:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 5), StorageBuffer>}
-    // CHECK-SAME: [[ARG6:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 6), StorageBuffer>}
+    // CHECK-SAME: %[[ARG0:.*]]: !spv.ptr<!spv.struct<!spv.array<48 x f32, stride=4> [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 0)>}
+    // CHECK-SAME: %[[ARG1:.*]]: !spv.ptr<!spv.struct<!spv.array<48 x f32, stride=4> [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 1)>}
+    // CHECK-SAME: %[[ARG2:.*]]: !spv.ptr<!spv.struct<!spv.array<48 x f32, stride=4> [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 2)>}
+    // CHECK-SAME: %[[ARG3:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 3), StorageBuffer>}
+    // CHECK-SAME: %[[ARG4:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 4), StorageBuffer>}
+    // CHECK-SAME: %[[ARG5:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 5), StorageBuffer>}
+    // CHECK-SAME: %[[ARG6:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 6), StorageBuffer>}
     gpu.func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index) kernel
       attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
-      // CHECK: [[ADDRESSWORKGROUPID:%.*]] = spv._address_of [[WORKGROUPIDVAR]]
-      // CHECK: [[WORKGROUPID:%.*]] = spv.Load "Input" [[ADDRESSWORKGROUPID]]
-      // CHECK: [[WORKGROUPIDX:%.*]] = spv.CompositeExtract [[WORKGROUPID]]{{\[}}0 : i32{{\]}}
-      // CHECK: [[ADDRESSLOCALINVOCATIONID:%.*]] = spv._address_of [[LOCALINVOCATIONIDVAR]]
-      // CHECK: [[LOCALINVOCATIONID:%.*]] = spv.Load "Input" [[ADDRESSLOCALINVOCATIONID]]
-      // CHECK: [[LOCALINVOCATIONIDX:%.*]] = spv.CompositeExtract [[LOCALINVOCATIONID]]{{\[}}0 : i32{{\]}}
+      // CHECK: %[[ADDRESSWORKGROUPID:.*]] = spv._address_of @[[WORKGROUPIDVAR]]
+      // CHECK: %[[WORKGROUPID:.*]] = spv.Load "Input" %[[ADDRESSWORKGROUPID]]
+      // CHECK: %[[WORKGROUPIDX:.*]] = spv.CompositeExtract %[[WORKGROUPID]]{{\[}}0 : i32{{\]}}
+      // CHECK: %[[ADDRESSLOCALINVOCATIONID:.*]] = spv._address_of @[[LOCALINVOCATIONIDVAR]]
+      // CHECK: %[[LOCALINVOCATIONID:.*]] = spv.Load "Input" %[[ADDRESSLOCALINVOCATIONID]]
+      // CHECK: %[[LOCALINVOCATIONIDX:.*]] = spv.CompositeExtract %[[LOCALINVOCATIONID]]{{\[}}0 : i32{{\]}}
       %0 = "gpu.block_id"() {dimension = "x"} : () -> index
       %1 = "gpu.block_id"() {dimension = "y"} : () -> index
       %2 = "gpu.block_id"() {dimension = "z"} : () -> index
@@ -54,26 +54,26 @@
       %9 = "gpu.block_dim"() {dimension = "x"} : () -> index
       %10 = "gpu.block_dim"() {dimension = "y"} : () -> index
       %11 = "gpu.block_dim"() {dimension = "z"} : () -> index
-      // CHECK: [[INDEX1:%.*]] = spv.IAdd [[ARG3]], [[WORKGROUPIDX]]
+      // CHECK: %[[INDEX1:.*]] = spv.IAdd %[[ARG3]], %[[WORKGROUPIDX]]
       %12 = addi %arg3, %0 : index
-      // CHECK: [[INDEX2:%.*]] = spv.IAdd [[ARG4]], [[LOCALINVOCATIONIDX]]
+      // CHECK: %[[INDEX2:.*]] = spv.IAdd %[[ARG4]], %[[LOCALINVOCATIONIDX]]
       %13 = addi %arg4, %3 : index
-      // CHECK: [[STRIDE1_1:%.*]] = spv.constant 4 : i32
-      // CHECK: [[OFFSET1_1:%.*]] = spv.IMul [[STRIDE1_1]], [[INDEX1]] : i32
-      // CHECK: [[STRIDE1_2:%.*]] = spv.constant 1 : i32
-      // CHECK: [[UPDATE1_2:%.*]] = spv.IMul [[STRIDE1_2]], [[INDEX2]] : i32
-      // CHECK: [[OFFSET1_2:%.*]] = spv.IAdd [[OFFSET1_1]], [[UPDATE1_2]] : i32
-      // CHECK: [[ZERO1:%.*]] = spv.constant 0 : i32
-      // CHECK: [[PTR1:%.*]] = spv.AccessChain [[ARG0]]{{\[}}[[ZERO1]], [[OFFSET1_2]]{{\]}}
-      // CHECK-NEXT: [[VAL1:%.*]] = spv.Load "StorageBuffer" [[PTR1]]
+      // CHECK: %[[STRIDE1_1:.*]] = spv.constant 4 : i32
+      // CHECK: %[[OFFSET1_1:.*]] = spv.IMul %[[STRIDE1_1]], %[[INDEX1]] : i32
+      // CHECK: %[[STRIDE1_2:.*]] = spv.constant 1 : i32
+      // CHECK: %[[UPDATE1_2:.*]] = spv.IMul %[[STRIDE1_2]], %[[INDEX2]] : i32
+      // CHECK: %[[OFFSET1_2:.*]] = spv.IAdd %[[OFFSET1_1]], %[[UPDATE1_2]] : i32
+      // CHECK: %[[ZERO1:.*]] = spv.constant 0 : i32
+      // CHECK: %[[PTR1:.*]] = spv.AccessChain %[[ARG0]]{{\[}}%[[ZERO1]], %[[OFFSET1_2]]{{\]}}
+      // CHECK-NEXT: %[[VAL1:.*]] = spv.Load "StorageBuffer" %[[PTR1]]
       %14 = load %arg0[%12, %13] : memref<12x4xf32>
-      // CHECK: [[PTR2:%.*]] = spv.AccessChain [[ARG1]]{{\[}}{{%.*}}, {{%.*}}{{\]}}
-      // CHECK-NEXT: [[VAL2:%.*]] = spv.Load "StorageBuffer" [[PTR2]]
+      // CHECK: %[[PTR2:.*]] = spv.AccessChain %[[ARG1]]{{\[}}{{%.*}}, {{%.*}}{{\]}}
+      // CHECK-NEXT: %[[VAL2:.*]] = spv.Load "StorageBuffer" %[[PTR2]]
       %15 = load %arg1[%12, %13] : memref<12x4xf32>
-      // CHECK: [[VAL3:%.*]] = spv.FAdd [[VAL1]], [[VAL2]]
+      // CHECK: %[[VAL3:.*]] = spv.FAdd %[[VAL1]], %[[VAL2]]
       %16 = addf %14, %15 : f32
-      // CHECK: [[PTR3:%.*]] = spv.AccessChain [[ARG2]]{{\[}}{{%.*}}, {{%.*}}{{\]}}
-      // CHECK-NEXT: spv.Store "StorageBuffer" [[PTR3]], [[VAL3]]
+      // CHECK: %[[PTR3:.*]] = spv.AccessChain %[[ARG2]]{{\[}}{{%.*}}, {{%.*}}{{\]}}
+      // CHECK-NEXT: spv.Store "StorageBuffer" %[[PTR3]], %[[VAL3]]
       store %16, %arg2[%12, %13] : memref<12x4xf32>
       gpu.return
     }
diff --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
@@ -16,27 +16,29 @@
   gpu.module @kernels {
     gpu.func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>) kernel
     attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
-      // CHECK: [[LB:%.*]] = spv.constant 4 : i32
+      // CHECK: %[[LB:.*]] = spv.constant 4 : i32
       %lb = constant 4 : index
-      // CHECK: [[UB:%.*]] = spv.constant 42 : i32
+      // CHECK: %[[UB:.*]] = spv.constant 42 : i32
       %ub = constant 42 : index
-      // CHECK: [[STEP:%.*]] = spv.constant 2 : i32
+      // CHECK: %[[STEP:.*]] = spv.constant 2 : i32
       %step = constant 2 : index
       // CHECK:      spv.loop {
-      // CHECK-NEXT:   spv.Branch [[HEADER:\^.*]]([[LB]] : i32)
-      // CHECK:      [[HEADER]]([[INDVAR:%.*]]: i32):
-      // CHECK:        [[CMP:%.*]] = spv.SLessThan [[INDVAR]], [[UB]] : i32
-      // CHECK:        spv.BranchConditional [[CMP]], [[BODY:\^.*]], [[MERGE:\^.*]]
-      // CHECK:      [[BODY]]:
-      // CHECK:        [[STRIDE1:%.*]] = spv.constant 1 : i32
-      // CHECK:        [[OFFSET1:%.*]] = spv.IMul [[STRIDE1]], [[INDVAR]] : i32
-      // CHECK:        spv.AccessChain {{%.*}}{{\[}}{{%.*}}, [[OFFSET1]]{{\]}} : {{.*}}
-      // CHECK:        [[STRIDE2:%.*]] = spv.constant 1 : i32
-      // CHECK:        [[OFFSET2:%.*]] = spv.IMul [[STRIDE2]], [[INDVAR]] : i32
-      // CHECK:        spv.AccessChain {{%.*}}{{\[}}{{%.*}}, [[OFFSET2]]{{\]}} : {{.*}}
-      // CHECK:        [[INCREMENT:%.*]] = spv.IAdd [[INDVAR]], [[STEP]] : i32
-      // CHECK:        spv.Branch [[HEADER]]([[INCREMENT]] : i32)
-      // CHECK:      [[MERGE]]
+      // CHECK-NEXT:   spv.Branch ^[[HEADER:.*]](%[[LB]] : i32)
+      // CHECK:      ^[[HEADER]](%[[INDVAR:.*]]: i32):
+      // CHECK:        %[[CMP:.*]] = spv.SLessThan %[[INDVAR]], %[[UB]] : i32
+      // CHECK:        spv.BranchConditional %[[CMP]], ^[[BODY:.*]], ^[[MERGE:.*]]
+      // CHECK:      ^[[BODY]]:
+      // CHECK:        %[[STRIDE1:.*]] = spv.constant 1 : i32
+      // CHECK:        %[[INDEX1:.*]] = spv.IMul %[[STRIDE1]], %[[INDVAR]] : i32
+      // CHECK:        %[[ZERO1:.*]] = spv.constant 0 : i32
+      // CHECK:        spv.AccessChain {{%.*}}{{\[}}%[[ZERO1]], %[[INDEX1]]{{\]}}
+      // CHECK:        %[[STRIDE2:.*]] = spv.constant 1 : i32
+      // CHECK:        %[[INDEX2:.*]] = spv.IMul %[[STRIDE2]], %[[INDVAR]] : i32
+      // CHECK:        %[[ZERO2:.*]] = spv.constant 0 : i32
+      // CHECK:        spv.AccessChain {{%.*}}[%[[ZERO2]], %[[INDEX2]]]
+      // CHECK:        %[[INCREMENT:.*]] = spv.IAdd %[[INDVAR]], %[[STEP]] : i32
+      // CHECK:        spv.Branch ^[[HEADER]](%[[INCREMENT]] : i32)
+      // CHECK:      ^[[MERGE]]
       // CHECK:        spv._merge
       // CHECK:      }
       scf.for %arg4 = %lb to %ub step %step {
diff --git a/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir
--- a/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir
@@ -15,23 +15,23 @@
 func @fold_static_stride_subview
        (%arg0 : memref<12x32xf32>, %arg1 : index,
         %arg2 : index, %arg3 : index, %arg4 : index) {
-  // CHECK: %[[C2:.*]] = constant 2
-  // CHECK: %[[C3:.*]] = constant 3
-  // CHECK: %[[T0:.*]] = muli %[[ARG3]], %[[C2]]
-  // CHECK: %[[T1:.*]] = addi %[[ARG1]], %[[T0]]
-  // CHECK: %[[T2:.*]] = muli %[[ARG4]], %[[C3]]
-  // CHECK: %[[T3:.*]] = addi %[[ARG2]], %[[T2]]
-  // CHECK: %[[LOADVAL:.*]] = load %[[ARG0]][%[[T1]], %[[T3]]]
-  // CHECK: %[[STOREVAL:.*]] = sqrt %[[LOADVAL]]
-  // CHECK: %[[T6:.*]] = muli %[[ARG3]], %[[C2]]
-  // CHECK: %[[T7:.*]] = addi %[[ARG1]], %[[T6]]
-  // CHECK: %[[T8:.*]] = muli %[[ARG4]], %[[C3]]
-  // CHECK: %[[T9:.*]] = addi %[[ARG2]], %[[T8]]
-  // CHECK store %[[STOREVAL]], %[[ARG0]][%[[T7]], %[[T9]]]
-  %0 = subview %arg0[%arg1, %arg2][4, 4][2, 3] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [64, 3]>
-  %1 = load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [64, 3]>
+  // CHECK-DAG: %[[C2:.*]] = constant 2
+  // CHECK-DAG: %[[C3:.*]] = constant 3
+  //     CHECK: %[[T0:.*]] = muli %[[ARG3]], %[[C3]]
+  //     CHECK: %[[T1:.*]] = addi %[[ARG1]], %[[T0]]
+  //     CHECK: %[[T2:.*]] = muli %[[ARG4]], %[[ARG2]]
+  //     CHECK: %[[T3:.*]] = addi %[[T2]], %[[C2]]
+  //     CHECK: %[[LOADVAL:.*]] = load %[[ARG0]][%[[T1]], %[[T3]]]
+  //     CHECK: %[[STOREVAL:.*]] = sqrt %[[LOADVAL]]
+  //     CHECK: %[[T6:.*]] = muli %[[ARG3]], %[[C3]]
+  //     CHECK: %[[T7:.*]] = addi %[[ARG1]], %[[T6]]
+  //     CHECK: %[[T8:.*]] = muli %[[ARG4]], %[[ARG2]]
+  //     CHECK: %[[T9:.*]] = addi %[[T8]], %[[C2]]
+  //     CHECK: store %[[STOREVAL]], %[[ARG0]][%[[T7]], %[[T9]]]
+  %0 = subview %arg0[%arg1, 2][4, 4][3, %arg2] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [96, ?]>
+  %1 = load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [96, ?]>
   %2 = sqrt %1 : f32
-  store %2, %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [64, 3]>
+  store %2, %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [96, ?]>
   return
 }