diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -2685,6 +2685,21 @@ /// constructed with `b` at location `loc`. SmallVector getOrCreateRanges(OpBuilder &b, Location loc); + /// Return the offsets as Values. Each Value is either the dynamic + /// value specified in the op or a ConstantIndexOp constructed + /// with `b` at location `loc` + SmallVector getOrCreateOffsets(OpBuilder &b, Location loc); + + /// Return the sizes as Values. Each Value is either the dynamic + /// value specified in the op or a ConstantIndexOp constructed + /// with `b` at location `loc` + SmallVector getOrCreateSizes(OpBuilder &b, Location loc); + + /// Return the strides as Values. Each Value is either the dynamic + /// value specified in the op or a ConstantIndexOp constructed with + /// `b` at location `loc` + SmallVector getOrCreateStrides(OpBuilder &b, Location loc); + /// A subview result type can be fully inferred from the source type and the /// static representation of offsets, sizes and strides. Special sentinels /// encode the dynamic case. diff --git a/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp --- a/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp +++ b/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp @@ -64,34 +64,15 @@ // TODO: Aborting when the offsets are static. There might be a way to fold // the subview op with load even if the offsets have been canonicalized // away. - if (subViewOp.getNumOffsets() == 0) - return failure(); - - ValueRange opOffsets = subViewOp.offsets(); - SmallVector opStrides; - if (subViewOp.getNumStrides()) { - // If the strides are dynamic, get the stride operands. - opStrides = llvm::to_vector<2>(subViewOp.strides()); - } else { - // When static, the stride operands can be retrieved by taking the strides - // of the result of the subview op, and dividing the strides of the base - // memref. - SmallVector staticStrides; - if (failed(subViewOp.getStaticStrides(staticStrides))) { - return failure(); - } - opStrides.reserve(opOffsets.size()); - for (auto stride : staticStrides) { - auto constValAttr = rewriter.getIntegerAttr( - IndexType::get(rewriter.getContext()), stride); - opStrides.emplace_back(rewriter.create(loc, constValAttr)); - } - } - assert(opOffsets.size() == opStrides.size()); + SmallVector opOffsets = subViewOp.getOrCreateOffsets(rewriter, loc); + SmallVector opStrides = subViewOp.getOrCreateStrides(rewriter, loc); + assert(opOffsets.size() == indices.size() && + "expected as many indices as rank of subview op result type"); + assert(opStrides.size() == indices.size() && + "expected as many indices as rank of subview op result type"); // New indices for the load are the current indices * subview_stride + // subview_offset. - assert(indices.size() == opStrides.size()); sourceIndices.resize(indices.size()); for (auto index : llvm::enumerate(indices)) { auto offset = opOffsets[index.index()]; diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -2548,6 +2548,44 @@ return res; } +SmallVector SubViewOp::getOrCreateOffsets(OpBuilder &b, + Location loc) { + unsigned dynamicIdx = 1; + return llvm::to_vector<4>(llvm::map_range( + static_offsets().cast(), [&](Attribute a) -> Value { + int64_t staticOffset = a.cast().getInt(); + if (ShapedType::isDynamicStrideOrOffset(staticOffset)) + return getOperand(dynamicIdx++); + else + return b.create(loc, staticOffset); + })); +} + +SmallVector SubViewOp::getOrCreateSizes(OpBuilder &b, Location loc) { + unsigned dynamicIdx = 1 + offsets().size(); + return llvm::to_vector<4>(llvm::map_range( + static_sizes().cast(), [&](Attribute a) -> Value { + int64_t staticSize = a.cast().getInt(); + if (ShapedType::isDynamic(staticSize)) + return getOperand(dynamicIdx++); + else + return b.create(loc, staticSize); + })); +} + +SmallVector SubViewOp::getOrCreateStrides(OpBuilder &b, + Location loc) { + unsigned dynamicIdx = 1 + offsets().size() + sizes().size(); + return llvm::to_vector<4>(llvm::map_range( + static_strides().cast(), [&](Attribute a) -> Value { + int64_t staticStride = a.cast().getInt(); + if (ShapedType::isDynamicStrideOrOffset(staticStride)) + return getOperand(dynamicIdx++); + else + return b.create(loc, staticStride); + })); +} + LogicalResult SubViewOp::getStaticStrides(SmallVectorImpl &staticStrides) { if (!strides().empty()) diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir --- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir @@ -23,25 +23,25 @@ // CHECK-LABEL: spv.module Logical GLSL450 gpu.module @kernels { - // CHECK-DAG: spv.globalVariable [[NUMWORKGROUPSVAR:@.*]] built_in("NumWorkgroups") : !spv.ptr, Input> - // CHECK-DAG: spv.globalVariable [[LOCALINVOCATIONIDVAR:@.*]] built_in("LocalInvocationId") : !spv.ptr, Input> - // CHECK-DAG: spv.globalVariable [[WORKGROUPIDVAR:@.*]] built_in("WorkgroupId") : !spv.ptr, Input> + // CHECK-DAG: spv.globalVariable @[[NUMWORKGROUPSVAR:.*]] built_in("NumWorkgroups") : !spv.ptr, Input> + // CHECK-DAG: spv.globalVariable @[[LOCALINVOCATIONIDVAR:.*]] built_in("LocalInvocationId") : !spv.ptr, Input> + // CHECK-DAG: spv.globalVariable @[[WORKGROUPIDVAR:.*]] built_in("WorkgroupId") : !spv.ptr, Input> // CHECK-LABEL: spv.func @load_store_kernel - // CHECK-SAME: [[ARG0:%.*]]: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 0)>} - // CHECK-SAME: [[ARG1:%.*]]: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 1)>} - // CHECK-SAME: [[ARG2:%.*]]: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 2)>} - // CHECK-SAME: [[ARG3:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 3), StorageBuffer>} - // CHECK-SAME: [[ARG4:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 4), StorageBuffer>} - // CHECK-SAME: [[ARG5:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 5), StorageBuffer>} - // CHECK-SAME: [[ARG6:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 6), StorageBuffer>} + // CHECK-SAME: %[[ARG0:.*]]: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 0)>} + // CHECK-SAME: %[[ARG1:.*]]: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 1)>} + // CHECK-SAME: %[[ARG2:.*]]: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 2)>} + // CHECK-SAME: %[[ARG3:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 3), StorageBuffer>} + // CHECK-SAME: %[[ARG4:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 4), StorageBuffer>} + // CHECK-SAME: %[[ARG5:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 5), StorageBuffer>} + // CHECK-SAME: %[[ARG6:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 6), StorageBuffer>} gpu.func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index) kernel attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { - // CHECK: [[ADDRESSWORKGROUPID:%.*]] = spv._address_of [[WORKGROUPIDVAR]] - // CHECK: [[WORKGROUPID:%.*]] = spv.Load "Input" [[ADDRESSWORKGROUPID]] - // CHECK: [[WORKGROUPIDX:%.*]] = spv.CompositeExtract [[WORKGROUPID]]{{\[}}0 : i32{{\]}} - // CHECK: [[ADDRESSLOCALINVOCATIONID:%.*]] = spv._address_of [[LOCALINVOCATIONIDVAR]] - // CHECK: [[LOCALINVOCATIONID:%.*]] = spv.Load "Input" [[ADDRESSLOCALINVOCATIONID]] - // CHECK: [[LOCALINVOCATIONIDX:%.*]] = spv.CompositeExtract [[LOCALINVOCATIONID]]{{\[}}0 : i32{{\]}} + // CHECK: %[[ADDRESSWORKGROUPID:.*]] = spv._address_of @[[WORKGROUPIDVAR]] + // CHECK: %[[WORKGROUPID:.*]] = spv.Load "Input" %[[ADDRESSWORKGROUPID]] + // CHECK: %[[WORKGROUPIDX:.*]] = spv.CompositeExtract %[[WORKGROUPID]]{{\[}}0 : i32{{\]}} + // CHECK: %[[ADDRESSLOCALINVOCATIONID:.*]] = spv._address_of @[[LOCALINVOCATIONIDVAR]] + // CHECK: %[[LOCALINVOCATIONID:.*]] = spv.Load "Input" %[[ADDRESSLOCALINVOCATIONID]] + // CHECK: %[[LOCALINVOCATIONIDX:.*]] = spv.CompositeExtract %[[LOCALINVOCATIONID]]{{\[}}0 : i32{{\]}} %0 = "gpu.block_id"() {dimension = "x"} : () -> index %1 = "gpu.block_id"() {dimension = "y"} : () -> index %2 = "gpu.block_id"() {dimension = "z"} : () -> index @@ -54,26 +54,26 @@ %9 = "gpu.block_dim"() {dimension = "x"} : () -> index %10 = "gpu.block_dim"() {dimension = "y"} : () -> index %11 = "gpu.block_dim"() {dimension = "z"} : () -> index - // CHECK: [[INDEX1:%.*]] = spv.IAdd [[ARG3]], [[WORKGROUPIDX]] + // CHECK: %[[INDEX1:.*]] = spv.IAdd %[[ARG3]], %[[WORKGROUPIDX]] %12 = addi %arg3, %0 : index - // CHECK: [[INDEX2:%.*]] = spv.IAdd [[ARG4]], [[LOCALINVOCATIONIDX]] + // CHECK: %[[INDEX2:.*]] = spv.IAdd %[[ARG4]], %[[LOCALINVOCATIONIDX]] %13 = addi %arg4, %3 : index - // CHECK: [[STRIDE1_1:%.*]] = spv.constant 4 : i32 - // CHECK: [[OFFSET1_1:%.*]] = spv.IMul [[STRIDE1_1]], [[INDEX1]] : i32 - // CHECK: [[STRIDE1_2:%.*]] = spv.constant 1 : i32 - // CHECK: [[UPDATE1_2:%.*]] = spv.IMul [[STRIDE1_2]], [[INDEX2]] : i32 - // CHECK: [[OFFSET1_2:%.*]] = spv.IAdd [[OFFSET1_1]], [[UPDATE1_2]] : i32 - // CHECK: [[ZERO1:%.*]] = spv.constant 0 : i32 - // CHECK: [[PTR1:%.*]] = spv.AccessChain [[ARG0]]{{\[}}[[ZERO1]], [[OFFSET1_2]]{{\]}} - // CHECK-NEXT: [[VAL1:%.*]] = spv.Load "StorageBuffer" [[PTR1]] + // CHECK: %[[STRIDE1_1:.*]] = spv.constant 4 : i32 + // CHECK: %[[OFFSET1_1:.*]] = spv.IMul %[[STRIDE1_1]], %[[INDEX1]] : i32 + // CHECK: %[[STRIDE1_2:.*]] = spv.constant 1 : i32 + // CHECK: %[[UPDATE1_2:.*]] = spv.IMul %[[STRIDE1_2]], %[[INDEX2]] : i32 + // CHECK: %[[OFFSET1_2:.*]] = spv.IAdd %[[OFFSET1_1]], %[[UPDATE1_2]] : i32 + // CHECK: %[[ZERO1:.*]] = spv.constant 0 : i32 + // CHECK: %[[PTR1:.*]] = spv.AccessChain %[[ARG0]]{{\[}}%[[ZERO1]], %[[OFFSET1_2]]{{\]}} + // CHECK-NEXT: %[[VAL1:.*]] = spv.Load "StorageBuffer" %[[PTR1]] %14 = load %arg0[%12, %13] : memref<12x4xf32> - // CHECK: [[PTR2:%.*]] = spv.AccessChain [[ARG1]]{{\[}}{{%.*}}, {{%.*}}{{\]}} - // CHECK-NEXT: [[VAL2:%.*]] = spv.Load "StorageBuffer" [[PTR2]] + // CHECK: %[[PTR2:.*]] = spv.AccessChain %[[ARG1]]{{\[}}{{%.*}}, {{%.*}}{{\]}} + // CHECK-NEXT: %[[VAL2:.*]] = spv.Load "StorageBuffer" %[[PTR2]] %15 = load %arg1[%12, %13] : memref<12x4xf32> - // CHECK: [[VAL3:%.*]] = spv.FAdd [[VAL1]], [[VAL2]] + // CHECK: %[[VAL3:.*]] = spv.FAdd %[[VAL1]], %[[VAL2]] %16 = addf %14, %15 : f32 - // CHECK: [[PTR3:%.*]] = spv.AccessChain [[ARG2]]{{\[}}{{%.*}}, {{%.*}}{{\]}} - // CHECK-NEXT: spv.Store "StorageBuffer" [[PTR3]], [[VAL3]] + // CHECK: %[[PTR3:.*]] = spv.AccessChain %[[ARG2]]{{\[}}{{%.*}}, {{%.*}}{{\]}} + // CHECK-NEXT: spv.Store "StorageBuffer" %[[PTR3]], %[[VAL3]] store %16, %arg2[%12, %13] : memref<12x4xf32> gpu.return } diff --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir --- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir @@ -16,27 +16,29 @@ gpu.module @kernels { gpu.func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>) kernel attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { - // CHECK: [[LB:%.*]] = spv.constant 4 : i32 + // CHECK: %[[LB:.*]] = spv.constant 4 : i32 %lb = constant 4 : index - // CHECK: [[UB:%.*]] = spv.constant 42 : i32 + // CHECK: %[[UB:.*]] = spv.constant 42 : i32 %ub = constant 42 : index - // CHECK: [[STEP:%.*]] = spv.constant 2 : i32 + // CHECK: %[[STEP:.*]] = spv.constant 2 : i32 %step = constant 2 : index // CHECK: spv.loop { - // CHECK-NEXT: spv.Branch [[HEADER:\^.*]]([[LB]] : i32) - // CHECK: [[HEADER]]([[INDVAR:%.*]]: i32): - // CHECK: [[CMP:%.*]] = spv.SLessThan [[INDVAR]], [[UB]] : i32 - // CHECK: spv.BranchConditional [[CMP]], [[BODY:\^.*]], [[MERGE:\^.*]] - // CHECK: [[BODY]]: - // CHECK: [[STRIDE1:%.*]] = spv.constant 1 : i32 - // CHECK: [[OFFSET1:%.*]] = spv.IMul [[STRIDE1]], [[INDVAR]] : i32 - // CHECK: spv.AccessChain {{%.*}}{{\[}}{{%.*}}, [[OFFSET1]]{{\]}} : {{.*}} - // CHECK: [[STRIDE2:%.*]] = spv.constant 1 : i32 - // CHECK: [[OFFSET2:%.*]] = spv.IMul [[STRIDE2]], [[INDVAR]] : i32 - // CHECK: spv.AccessChain {{%.*}}{{\[}}{{%.*}}, [[OFFSET2]]{{\]}} : {{.*}} - // CHECK: [[INCREMENT:%.*]] = spv.IAdd [[INDVAR]], [[STEP]] : i32 - // CHECK: spv.Branch [[HEADER]]([[INCREMENT]] : i32) - // CHECK: [[MERGE]] + // CHECK-NEXT: spv.Branch ^[[HEADER:.*]](%[[LB]] : i32) + // CHECK: ^[[HEADER]](%[[INDVAR:.*]]: i32): + // CHECK: %[[CMP:.*]] = spv.SLessThan %[[INDVAR]], %[[UB]] : i32 + // CHECK: spv.BranchConditional %[[CMP]], ^[[BODY:.*]], ^[[MERGE:.*]] + // CHECK: ^[[BODY]]: + // CHECK: %[[STRIDE1:.*]] = spv.constant 1 : i32 + // CHECK: %[[INDEX1:.*]] = spv.IMul %[[STRIDE1]], %[[INDVAR]] : i32 + // CHECK: %[[ZERO1:.*]] = spv.constant 0 : i32 + // CHECK: spv.AccessChain {{%.*}}{{\[}}%[[ZERO1]], %[[INDEX1]]{{\]}} + // CHECK: %[[STRIDE2:.*]] = spv.constant 1 : i32 + // CHECK: %[[INDEX2:.*]] = spv.IMul %[[STRIDE2]], %[[INDVAR]] : i32 + // CHECK: %[[ZERO2:.*]] = spv.constant 0 : i32 + // CHECK: spv.AccessChain {{%.*}}[%[[ZERO2]], %[[INDEX2]]] + // CHECK: %[[INCREMENT:.*]] = spv.IAdd %[[INDVAR]], %[[STEP]] : i32 + // CHECK: spv.Branch ^[[HEADER]](%[[INCREMENT]] : i32) + // CHECK: ^[[MERGE]] // CHECK: spv._merge // CHECK: } scf.for %arg4 = %lb to %ub step %step { diff --git a/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir --- a/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir +++ b/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir @@ -15,23 +15,23 @@ func @fold_static_stride_subview (%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index) { - // CHECK: %[[C2:.*]] = constant 2 - // CHECK: %[[C3:.*]] = constant 3 - // CHECK: %[[T0:.*]] = muli %[[ARG3]], %[[C2]] - // CHECK: %[[T1:.*]] = addi %[[ARG1]], %[[T0]] - // CHECK: %[[T2:.*]] = muli %[[ARG4]], %[[C3]] - // CHECK: %[[T3:.*]] = addi %[[ARG2]], %[[T2]] - // CHECK: %[[LOADVAL:.*]] = load %[[ARG0]][%[[T1]], %[[T3]]] - // CHECK: %[[STOREVAL:.*]] = sqrt %[[LOADVAL]] - // CHECK: %[[T6:.*]] = muli %[[ARG3]], %[[C2]] - // CHECK: %[[T7:.*]] = addi %[[ARG1]], %[[T6]] - // CHECK: %[[T8:.*]] = muli %[[ARG4]], %[[C3]] - // CHECK: %[[T9:.*]] = addi %[[ARG2]], %[[T8]] - // CHECK store %[[STOREVAL]], %[[ARG0]][%[[T7]], %[[T9]]] - %0 = subview %arg0[%arg1, %arg2][4, 4][2, 3] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [64, 3]> - %1 = load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [64, 3]> + // CHECK-DAG: %[[C2:.*]] = constant 2 + // CHECK-DAG: %[[C3:.*]] = constant 3 + // CHECK: %[[T0:.*]] = muli %[[ARG3]], %[[C3]] + // CHECK: %[[T1:.*]] = addi %[[ARG1]], %[[T0]] + // CHECK: %[[T2:.*]] = muli %[[ARG4]], %[[ARG2]] + // CHECK: %[[T3:.*]] = addi %[[T2]], %[[C2]] + // CHECK: %[[LOADVAL:.*]] = load %[[ARG0]][%[[T1]], %[[T3]]] + // CHECK: %[[STOREVAL:.*]] = sqrt %[[LOADVAL]] + // CHECK: %[[T6:.*]] = muli %[[ARG3]], %[[C3]] + // CHECK: %[[T7:.*]] = addi %[[ARG1]], %[[T6]] + // CHECK: %[[T8:.*]] = muli %[[ARG4]], %[[ARG2]] + // CHECK: %[[T9:.*]] = addi %[[T8]], %[[C2]] + // CHECK: store %[[STOREVAL]], %[[ARG0]][%[[T7]], %[[T9]]] + %0 = subview %arg0[%arg1, 2][4, 4][3, %arg2] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [96, ?]> + %1 = load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [96, ?]> %2 = sqrt %1 : f32 - store %2, %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [64, 3]> + store %2, %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [96, ?]> return }