diff --git a/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp --- a/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp +++ b/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp @@ -64,34 +64,30 @@ // TODO: Aborting when the offsets are static. There might be a way to fold // the subview op with load even if the offsets have been canonicalized // away. - if (subViewOp.getNumOffsets() == 0) - return failure(); - - ValueRange opOffsets = subViewOp.offsets(); - SmallVector opStrides; - if (subViewOp.getNumStrides()) { - // If the strides are dynamic, get the stride operands. - opStrides = llvm::to_vector<2>(subViewOp.strides()); - } else { - // When static, the stride operands can be retrieved by taking the strides - // of the result of the subview op, and dividing the strides of the base - // memref. - SmallVector staticStrides; - if (failed(subViewOp.getStaticStrides(staticStrides))) { - return failure(); + SmallVector opOffsets, opStrides; + unsigned rank = subViewOp.getType().getRank(); + opOffsets.reserve(rank); + opStrides.reserve(rank); + assert(indices.size() == rank && + "expected as many indices as rank of result"); + for (unsigned idx = 0; idx < rank; ++idx) { + if (subViewOp.isDynamicOffset(idx)) { + opOffsets.push_back(subViewOp.getDynamicOffset(idx)); + } else { + opOffsets.push_back(rewriter.create( + loc, subViewOp.getStaticOffset(idx))); } - opStrides.reserve(opOffsets.size()); - for (auto stride : staticStrides) { - auto constValAttr = rewriter.getIntegerAttr( - IndexType::get(rewriter.getContext()), stride); - opStrides.emplace_back(rewriter.create(loc, constValAttr)); + + if (subViewOp.isDynamicStride(idx)) { + opStrides.push_back(subViewOp.getDynamicStride(idx)); + } else { + opStrides.push_back(rewriter.create( + loc, subViewOp.getStaticStride(idx))); } } - assert(opOffsets.size() == opStrides.size()); // New indices for the load are the current indices * subview_stride + // subview_offset. - assert(indices.size() == opStrides.size()); sourceIndices.resize(indices.size()); for (auto index : llvm::enumerate(indices)) { auto offset = opOffsets[index.index()]; diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir --- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir @@ -23,25 +23,25 @@ // CHECK-LABEL: spv.module Logical GLSL450 gpu.module @kernels { - // CHECK-DAG: spv.globalVariable [[NUMWORKGROUPSVAR:@.*]] built_in("NumWorkgroups") : !spv.ptr, Input> - // CHECK-DAG: spv.globalVariable [[LOCALINVOCATIONIDVAR:@.*]] built_in("LocalInvocationId") : !spv.ptr, Input> - // CHECK-DAG: spv.globalVariable [[WORKGROUPIDVAR:@.*]] built_in("WorkgroupId") : !spv.ptr, Input> + // CHECK-DAG: spv.globalVariable @[[NUMWORKGROUPSVAR:.*]] built_in("NumWorkgroups") : !spv.ptr, Input> + // CHECK-DAG: spv.globalVariable @[[LOCALINVOCATIONIDVAR:.*]] built_in("LocalInvocationId") : !spv.ptr, Input> + // CHECK-DAG: spv.globalVariable @[[WORKGROUPIDVAR:.*]] built_in("WorkgroupId") : !spv.ptr, Input> // CHECK-LABEL: spv.func @load_store_kernel - // CHECK-SAME: [[ARG0:%.*]]: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 0)>} - // CHECK-SAME: [[ARG1:%.*]]: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 1)>} - // CHECK-SAME: [[ARG2:%.*]]: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 2)>} - // CHECK-SAME: [[ARG3:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 3), StorageBuffer>} - // CHECK-SAME: [[ARG4:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 4), StorageBuffer>} - // CHECK-SAME: [[ARG5:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 5), StorageBuffer>} - // CHECK-SAME: [[ARG6:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 6), StorageBuffer>} + // CHECK-SAME: %[[ARG0:.*]]: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 0)>} + // CHECK-SAME: %[[ARG1:.*]]: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 1)>} + // CHECK-SAME: %[[ARG2:.*]]: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 2)>} + // CHECK-SAME: %[[ARG3:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 3), StorageBuffer>} + // CHECK-SAME: %[[ARG4:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 4), StorageBuffer>} + // CHECK-SAME: %[[ARG5:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 5), StorageBuffer>} + // CHECK-SAME: %[[ARG6:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 6), StorageBuffer>} gpu.func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index) kernel attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { - // CHECK: [[ADDRESSWORKGROUPID:%.*]] = spv._address_of [[WORKGROUPIDVAR]] - // CHECK: [[WORKGROUPID:%.*]] = spv.Load "Input" [[ADDRESSWORKGROUPID]] - // CHECK: [[WORKGROUPIDX:%.*]] = spv.CompositeExtract [[WORKGROUPID]]{{\[}}0 : i32{{\]}} - // CHECK: [[ADDRESSLOCALINVOCATIONID:%.*]] = spv._address_of [[LOCALINVOCATIONIDVAR]] - // CHECK: [[LOCALINVOCATIONID:%.*]] = spv.Load "Input" [[ADDRESSLOCALINVOCATIONID]] - // CHECK: [[LOCALINVOCATIONIDX:%.*]] = spv.CompositeExtract [[LOCALINVOCATIONID]]{{\[}}0 : i32{{\]}} + // CHECK: %[[ADDRESSWORKGROUPID:.*]] = spv._address_of @[[WORKGROUPIDVAR]] + // CHECK: %[[WORKGROUPID:.*]] = spv.Load "Input" %[[ADDRESSWORKGROUPID]] + // CHECK: %[[WORKGROUPIDX:.*]] = spv.CompositeExtract %[[WORKGROUPID]]{{\[}}0 : i32{{\]}} + // CHECK: %[[ADDRESSLOCALINVOCATIONID:.*]] = spv._address_of @[[LOCALINVOCATIONIDVAR]] + // CHECK: %[[LOCALINVOCATIONID:.*]] = spv.Load "Input" %[[ADDRESSLOCALINVOCATIONID]] + // CHECK: %[[LOCALINVOCATIONIDX:.*]] = spv.CompositeExtract %[[LOCALINVOCATIONID]]{{\[}}0 : i32{{\]}} %0 = "gpu.block_id"() {dimension = "x"} : () -> index %1 = "gpu.block_id"() {dimension = "y"} : () -> index %2 = "gpu.block_id"() {dimension = "z"} : () -> index @@ -54,26 +54,28 @@ %9 = "gpu.block_dim"() {dimension = "x"} : () -> index %10 = "gpu.block_dim"() {dimension = "y"} : () -> index %11 = "gpu.block_dim"() {dimension = "z"} : () -> index - // CHECK: [[INDEX1:%.*]] = spv.IAdd [[ARG3]], [[WORKGROUPIDX]] + // CHECK: %[[INDEX1:.*]] = spv.IAdd %[[ARG3]], %[[WORKGROUPIDX]] %12 = addi %arg3, %0 : index - // CHECK: [[INDEX2:%.*]] = spv.IAdd [[ARG4]], [[LOCALINVOCATIONIDX]] + // CHECK: %[[INDEX2:.*]] = spv.IAdd %[[ARG4]], %[[LOCALINVOCATIONIDX]] %13 = addi %arg4, %3 : index - // CHECK: [[STRIDE1_1:%.*]] = spv.constant 4 : i32 - // CHECK: [[OFFSET1_1:%.*]] = spv.IMul [[STRIDE1_1]], [[INDEX1]] : i32 - // CHECK: [[STRIDE1_2:%.*]] = spv.constant 1 : i32 - // CHECK: [[UPDATE1_2:%.*]] = spv.IMul [[STRIDE1_2]], [[INDEX2]] : i32 - // CHECK: [[OFFSET1_2:%.*]] = spv.IAdd [[OFFSET1_1]], [[UPDATE1_2]] : i32 - // CHECK: [[ZERO1:%.*]] = spv.constant 0 : i32 - // CHECK: [[PTR1:%.*]] = spv.AccessChain [[ARG0]]{{\[}}[[ZERO1]], [[OFFSET1_2]]{{\]}} - // CHECK-NEXT: [[VAL1:%.*]] = spv.Load "StorageBuffer" [[PTR1]] + // CHECK: %[[ZERO1:.*]] = spv.constant 0 : i32 + // CHECK: %[[OFFSET1_0:.*]] = spv.constant 0 : i32 + // CHECK: %[[STRIDE1_1:.*]] = spv.constant 4 : i32 + // CHECK: %[[UPDATE1_1:.*]] = spv.IMul %[[STRIDE1_1]], %[[INDEX1]] : i32 + // CHECK: %[[OFFSET1_1:.*]] = spv.IAdd %[[OFFSET1_0]], %[[UPDATE1_1]] : i32 + // CHECK: %[[STRIDE1_2:.*]] = spv.constant 1 : i32 + // CHECK: %[[UPDATE1_2:.*]] = spv.IMul %[[STRIDE1_2]], %[[INDEX2]] : i32 + // CHECK: %[[OFFSET1_2:.*]] = spv.IAdd %[[OFFSET1_1]], %[[UPDATE1_2]] : i32 + // CHECK: %[[PTR1:.*]] = spv.AccessChain %[[ARG0]]{{\[}}%[[ZERO1]], %[[OFFSET1_2]]{{\]}} + // CHECK-NEXT: %[[VAL1:.*]] = spv.Load "StorageBuffer" %[[PTR1]] %14 = load %arg0[%12, %13] : memref<12x4xf32> - // CHECK: [[PTR2:%.*]] = spv.AccessChain [[ARG1]]{{\[}}{{%.*}}, {{%.*}}{{\]}} - // CHECK-NEXT: [[VAL2:%.*]] = spv.Load "StorageBuffer" [[PTR2]] + // CHECK: %[[PTR2:.*]] = spv.AccessChain %[[ARG1]]{{\[}}{{%.*}}, {{%.*}}{{\]}} + // CHECK-NEXT: %[[VAL2:.*]] = spv.Load "StorageBuffer" %[[PTR2]] %15 = load %arg1[%12, %13] : memref<12x4xf32> - // CHECK: [[VAL3:%.*]] = spv.FAdd [[VAL1]], [[VAL2]] + // CHECK: %[[VAL3:.*]] = spv.FAdd %[[VAL1]], %[[VAL2]] %16 = addf %14, %15 : f32 - // CHECK: [[PTR3:%.*]] = spv.AccessChain [[ARG2]]{{\[}}{{%.*}}, {{%.*}}{{\]}} - // CHECK-NEXT: spv.Store "StorageBuffer" [[PTR3]], [[VAL3]] + // CHECK: %[[PTR3:.*]] = spv.AccessChain %[[ARG2]]{{\[}}{{%.*}}, {{%.*}}{{\]}} + // CHECK-NEXT: spv.Store "StorageBuffer" %[[PTR3]], %[[VAL3]] store %16, %arg2[%12, %13] : memref<12x4xf32> gpu.return } diff --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir --- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir @@ -16,27 +16,33 @@ gpu.module @kernels { gpu.func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>) kernel attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { - // CHECK: [[LB:%.*]] = spv.constant 4 : i32 + // CHECK: %[[LB:.*]] = spv.constant 4 : i32 %lb = constant 4 : index - // CHECK: [[UB:%.*]] = spv.constant 42 : i32 + // CHECK: %[[UB:.*]] = spv.constant 42 : i32 %ub = constant 42 : index - // CHECK: [[STEP:%.*]] = spv.constant 2 : i32 + // CHECK: %[[STEP:.*]] = spv.constant 2 : i32 %step = constant 2 : index // CHECK: spv.loop { - // CHECK-NEXT: spv.Branch [[HEADER:\^.*]]([[LB]] : i32) - // CHECK: [[HEADER]]([[INDVAR:%.*]]: i32): - // CHECK: [[CMP:%.*]] = spv.SLessThan [[INDVAR]], [[UB]] : i32 - // CHECK: spv.BranchConditional [[CMP]], [[BODY:\^.*]], [[MERGE:\^.*]] - // CHECK: [[BODY]]: - // CHECK: [[STRIDE1:%.*]] = spv.constant 1 : i32 - // CHECK: [[OFFSET1:%.*]] = spv.IMul [[STRIDE1]], [[INDVAR]] : i32 - // CHECK: spv.AccessChain {{%.*}}{{\[}}{{%.*}}, [[OFFSET1]]{{\]}} : {{.*}} - // CHECK: [[STRIDE2:%.*]] = spv.constant 1 : i32 - // CHECK: [[OFFSET2:%.*]] = spv.IMul [[STRIDE2]], [[INDVAR]] : i32 - // CHECK: spv.AccessChain {{%.*}}{{\[}}{{%.*}}, [[OFFSET2]]{{\]}} : {{.*}} - // CHECK: [[INCREMENT:%.*]] = spv.IAdd [[INDVAR]], [[STEP]] : i32 - // CHECK: spv.Branch [[HEADER]]([[INCREMENT]] : i32) - // CHECK: [[MERGE]] + // CHECK-NEXT: spv.Branch ^[[HEADER:.*]](%[[LB]] : i32) + // CHECK: ^[[HEADER]](%[[INDVAR:.*]]: i32): + // CHECK: %[[CMP:.*]] = spv.SLessThan %[[INDVAR]], %[[UB]] : i32 + // CHECK: spv.BranchConditional %[[CMP]], ^[[BODY:.*]], ^[[MERGE:.*]] + // CHECK: ^[[BODY]]: + // CHECK: %[[ZERO1:.*]] = spv.constant 0 : i32 + // CHECK: %[[OFFSET0:.*]] = spv.constant 0 : i32 + // CHECK: %[[STRIDE1:.*]] = spv.constant 1 : i32 + // CHECK: %[[OFFSET1:.*]] = spv.IMul %[[STRIDE1]], %[[INDVAR]] : i32 + // CHECK: %[[INDEX1:.*]] = spv.IAdd %[[OFFSET0]], %[[OFFSET1]] : i32 + // CHECK: spv.AccessChain {{%.*}}{{\[}}%[[ZERO1]], %[[INDEX1]]{{\]}} + // CHECK: %[[ZERO2:.*]] = spv.constant 0 : i32 + // CHECK: %[[OFFSET2:.*]] = spv.constant 0 : i32 + // CHECK: %[[STRIDE2:.*]] = spv.constant 1 : i32 + // CHECK: %[[OFFSET3:.*]] = spv.IMul %[[STRIDE2]], %[[INDVAR]] : i32 + // CHECK: %[[INDEX2:.*]] = spv.IAdd %[[OFFSET2]], %[[OFFSET3]] : i32 + // CHECK: spv.AccessChain {{%.*}}[%[[ZERO2]], %[[INDEX2]]] + // CHECK: %[[INCREMENT:.*]] = spv.IAdd %[[INDVAR]], %[[STEP]] : i32 + // CHECK: spv.Branch ^[[HEADER]](%[[INCREMENT]] : i32) + // CHECK: ^[[MERGE]] // CHECK: spv._merge // CHECK: } scf.for %arg4 = %lb to %ub step %step { diff --git a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir --- a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir +++ b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir @@ -725,12 +725,14 @@ // CHECK-LABEL: @load_i16 // CHECK: (%[[ARG0:.+]]: {{.*}}, %[[ARG1:.+]]: i32) func @load_i16(%arg0: memref<10xi16>, %index : index) { + // CHECK: %[[ZERO1:.+]] = spv.constant 0 : i32 + // CHECK: %[[ZERO2:.+]] = spv.constant 0 : i32 // CHECK: %[[ONE:.+]] = spv.constant 1 : i32 - // CHECK: %[[FLAT_IDX:.+]] = spv.IMul %[[ONE]], %[[ARG1]] : i32 - // CHECK: %[[ZERO:.+]] = spv.constant 0 : i32 + // CHECK: %[[UPDATE:.+]] = spv.IMul %[[ONE]], %[[ARG1]] : i32 + // CHECK: %[[FLAT_IDX:.+]] = spv.IAdd %[[ZERO2]], %[[UPDATE]] : i32 // CHECK: %[[TWO1:.+]] = spv.constant 2 : i32 // CHECK: %[[QUOTIENT:.+]] = spv.SDiv %[[FLAT_IDX]], %[[TWO1]] : i32 - // CHECK: %[[PTR:.+]] = spv.AccessChain %{{.+}}[%[[ZERO]], %[[QUOTIENT]]] + // CHECK: %[[PTR:.+]] = spv.AccessChain %{{.+}}[%[[ZERO1]], %[[QUOTIENT]]] // CHECK: %[[LOAD:.+]] = spv.Load "StorageBuffer" %[[PTR]] // CHECK: %[[TWO2:.+]] = spv.constant 2 : i32 // CHECK: %[[SIXTEEN:.+]] = spv.constant 16 : i32 @@ -786,9 +788,11 @@ // CHECK-LABEL: @store_i16 // CHECK: (%[[ARG0:.+]]: {{.*}}, %[[ARG1:.+]]: i32, %[[ARG2:.+]]: i32) func @store_i16(%arg0: memref<10xi16>, %index: index, %value: i16) { + // CHECK: %[[ZERO1:.+]] = spv.constant 0 : i32 + // CHECK: %[[ZERO2:.+]] = spv.constant 0 : i32 // CHECK: %[[ONE:.+]] = spv.constant 1 : i32 - // CHECK: %[[FLAT_IDX:.+]] = spv.IMul %[[ONE]], %[[ARG1]] : i32 - // CHECK: %[[ZERO:.+]] = spv.constant 0 : i32 + // CHECK: %[[UPDATE:.+]] = spv.IMul %[[ONE]], %[[ARG1]] : i32 + // CHECK: %[[FLAT_IDX:.+]] = spv.IAdd %[[ZERO2]], %[[UPDATE]] : i32 // CHECK: %[[TWO:.+]] = spv.constant 2 : i32 // CHECK: %[[SIXTEEN:.+]] = spv.constant 16 : i32 // CHECK: %[[IDX:.+]] = spv.SMod %[[FLAT_IDX]], %[[TWO]] : i32 @@ -800,7 +804,7 @@ // CHECK: %[[STORE_VAL:.+]] = spv.ShiftLeftLogical %[[CLAMPED_VAL]], %[[OFFSET]] : i32, i32 // CHECK: %[[TWO2:.+]] = spv.constant 2 : i32 // CHECK: %[[ACCESS_IDX:.+]] = spv.SDiv %[[FLAT_IDX]], %[[TWO2]] : i32 - // CHECK: %[[PTR:.+]] = spv.AccessChain %[[ARG0]][%[[ZERO]], %[[ACCESS_IDX]]] + // CHECK: %[[PTR:.+]] = spv.AccessChain %[[ARG0]][%[[ZERO1]], %[[ACCESS_IDX]]] // CHECK: spv.AtomicAnd "Device" "AcquireRelease" %[[PTR]], %[[MASK]] // CHECK: spv.AtomicOr "Device" "AcquireRelease" %[[PTR]], %[[STORE_VAL]] store %value, %arg0[%index] : memref<10xi16> diff --git a/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir --- a/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir +++ b/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir @@ -15,23 +15,23 @@ func @fold_static_stride_subview (%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index) { - // CHECK: %[[C2:.*]] = constant 2 - // CHECK: %[[C3:.*]] = constant 3 - // CHECK: %[[T0:.*]] = muli %[[ARG3]], %[[C2]] - // CHECK: %[[T1:.*]] = addi %[[ARG1]], %[[T0]] - // CHECK: %[[T2:.*]] = muli %[[ARG4]], %[[C3]] - // CHECK: %[[T3:.*]] = addi %[[ARG2]], %[[T2]] - // CHECK: %[[LOADVAL:.*]] = load %[[ARG0]][%[[T1]], %[[T3]]] - // CHECK: %[[STOREVAL:.*]] = sqrt %[[LOADVAL]] - // CHECK: %[[T6:.*]] = muli %[[ARG3]], %[[C2]] - // CHECK: %[[T7:.*]] = addi %[[ARG1]], %[[T6]] - // CHECK: %[[T8:.*]] = muli %[[ARG4]], %[[C3]] - // CHECK: %[[T9:.*]] = addi %[[ARG2]], %[[T8]] - // CHECK store %[[STOREVAL]], %[[ARG0]][%[[T7]], %[[T9]]] - %0 = subview %arg0[%arg1, %arg2][4, 4][2, 3] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [64, 3]> - %1 = load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [64, 3]> + // CHECK-DAG: %[[C2:.*]] = constant 2 + // CHECK-DAG: %[[C3:.*]] = constant 3 + // CHECK: %[[T0:.*]] = muli %[[ARG3]], %[[C3]] + // CHECK: %[[T1:.*]] = addi %[[ARG1]], %[[T0]] + // CHECK: %[[T2:.*]] = muli %[[ARG4]], %[[ARG2]] + // CHECK: %[[T3:.*]] = addi %[[T2]], %[[C2]] + // CHECK: %[[LOADVAL:.*]] = load %[[ARG0]][%[[T1]], %[[T3]]] + // CHECK: %[[STOREVAL:.*]] = sqrt %[[LOADVAL]] + // CHECK: %[[T6:.*]] = muli %[[ARG3]], %[[C3]] + // CHECK: %[[T7:.*]] = addi %[[ARG1]], %[[T6]] + // CHECK: %[[T8:.*]] = muli %[[ARG4]], %[[ARG2]] + // CHECK: %[[T9:.*]] = addi %[[T8]], %[[C2]] + // CHECK: store %[[STOREVAL]], %[[ARG0]][%[[T7]], %[[T9]]] + %0 = subview %arg0[%arg1, 2][4, 4][3, %arg2] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [96, ?]> + %1 = load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [96, ?]> %2 = sqrt %1 : f32 - store %2, %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [64, 3]> + store %2, %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [96, ?]> return }