diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir --- a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir +++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir @@ -4,6 +4,7 @@ // RUN: FileCheck %s #map0 = affine_map<(d0, d1) -> (d1, d0)> +#map1 = affine_map<(d0, d1) -> (d1)> func @print_memref_f32(memref<*xf32>) @@ -29,6 +30,7 @@ %c0 = constant 0 : index %c1 = constant 1 : index %c2 = constant 2 : index + %c3 = constant 3 : index %c6 = constant 6 : index %cst = constant -4.2e+01 : f32 %0 = call @alloc_2d_filled_f32(%c6, %c6) : (index, index) -> memref @@ -76,6 +78,28 @@ // CHECK-SAME: ( 205, 305, 405, 505, 504 ), // CHECK-SAME: ( 105, 205, 305, 405, 505 ) ) + %3 = vector.transfer_read %0[%c2, %c3], %cst : memref, vector<5x5xf32> + vector.print %3 : vector<5x5xf32> + // New 5x5 block rooted @{2, 3} in memory. + // CHECK-NEXT: ( ( 403, 503, 502, -42, -42 ), + // CHECK-SAME: ( 404, 504, 503, -42, -42 ), + // CHECK-SAME: ( 405, 505, 504, -42, -42 ), + // CHECK-SAME: ( 305, 405, 505, -42, -42 ), + // CHECK-SAME: ( -42, -42, -42, -42, -42 ) ) + + %4 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map0} : memref, vector<5x5xf32> + vector.print %4 : vector<5x5xf32> + // Transposed 5x5 block rooted @{2, 3} in memory. + // CHECK-NEXT: ( ( 403, 404, 405, 305, -42 ), + // CHECK-SAME: ( 503, 504, 505, 405, -42 ), + // CHECK-SAME: ( 502, 503, 504, 505, -42 ), + // CHECK-SAME: ( -42, -42, -42, -42, -42 ), + // CHECK-SAME: ( -42, -42, -42, -42, -42 ) ) + + %5 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map1} : memref, vector<5xf32> + vector.print %5 : vector<5xf32> + // CHECK-NEXT: ( 403, 503, 502, -42, -42 ) + dealloc %0 : memref return } diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -1096,7 +1096,7 @@ SmallVectorImpl &strides) { int64_t offset; auto successStrides = getStridesAndOffset(memRefType, strides, offset); - bool isContiguous = (strides.back() == 1); + bool isContiguous = strides.empty() || strides.back() == 1; if (isContiguous) { auto sizes = memRefType.getShape(); for (int index = 0, e = strides.size() - 2; index < e; ++index) { diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -111,15 +111,6 @@ template void emitLoops(Lambda loopBodyBuilder); - /// Operate within the body of `emitLoops` to: - /// 1. Compute the indexings `majorIvs + majorOffsets` and save them in - /// `majorIvsPlusOffsets`. - /// 2. Return a boolean that determines whether the first `majorIvs.rank()` - /// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`. - Value emitInBoundsCondition(ValueRange majorIvs, ValueRange majorOffsets, - MemRefBoundsCapture &memrefBounds, - SmallVectorImpl &majorIvsPlusOffsets); - /// Common state to lower vector transfer ops. PatternRewriter &rewriter; const VectorTransferToSCFOptions &options; @@ -196,11 +187,16 @@ return slt(v, ub); } -template -Value NDTransferOpHelper::emitInBoundsCondition( - ValueRange majorIvs, ValueRange majorOffsets, - MemRefBoundsCapture &memrefBounds, - SmallVectorImpl &majorIvsPlusOffsets) { +/// 1. Compute the indexings `majorIvs + majorOffsets` and save them in +/// `majorIvsPlusOffsets`. +/// 2. Return a value of i1 that determines whether the first `majorIvs.rank()` +/// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`. +static Value +emitInBoundsCondition(PatternRewriter &rewriter, + VectorTransferOpInterface xferOp, unsigned leadingRank, + ValueRange majorIvs, ValueRange majorOffsets, + MemRefBoundsCapture &memrefBounds, + SmallVectorImpl &majorIvsPlusOffsets) { Value inBoundsCondition; majorIvsPlusOffsets.reserve(majorIvs.size()); unsigned idx = 0; @@ -271,7 +267,8 @@ // context. SmallVector majorIvsPlusOffsets; Value inBoundsCondition = emitInBoundsCondition( - majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); + rewriter, cast(xferOp.getOperation()), + leadingRank, majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); if (inBoundsCondition) { // 2. If the condition is not null, we need an IfOp, which may yield @@ -374,7 +371,8 @@ // context. SmallVector majorIvsPlusOffsets; Value inBoundsCondition = emitInBoundsCondition( - majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); + rewriter, cast(xferOp.getOperation()), + leadingRank, majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); if (inBoundsCondition) { // 2.a. If the condition is not null, we need an IfOp, to write @@ -424,60 +422,6 @@ return coalescedIdx; } -/// Emits remote memory accesses that are clipped to the boundaries of the -/// MemRef. -template -static SmallVector -clip(TransferOpTy transfer, MemRefBoundsCapture &bounds, ArrayRef ivs) { - using namespace mlir::edsc; - - Value zero(std_constant_index(0)), one(std_constant_index(1)); - SmallVector memRefAccess(transfer.indices()); - SmallVector clippedScalarAccessExprs(memRefAccess.size()); - // Indices accessing to remote memory are clipped and their expressions are - // returned in clippedScalarAccessExprs. - for (unsigned memRefDim = 0; memRefDim < clippedScalarAccessExprs.size(); - ++memRefDim) { - // Linear search on a small number of entries. - int loopIndex = -1; - auto exprs = transfer.permutation_map().getResults(); - for (auto en : llvm::enumerate(exprs)) { - auto expr = en.value(); - auto dim = expr.template dyn_cast(); - // Sanity check. - assert( - (dim || expr.template cast().getValue() == 0) && - "Expected dim or 0 in permutationMap"); - if (dim && memRefDim == dim.getPosition()) { - loopIndex = en.index(); - break; - } - } - - // We cannot distinguish atm between unrolled dimensions that implement - // the "always full" tile abstraction and need clipping from the other - // ones. So we conservatively clip everything. - using namespace edsc::op; - auto N = bounds.ub(memRefDim); - auto i = memRefAccess[memRefDim]; - if (loopIndex < 0) { - auto N_minus_1 = N - one; - auto select_1 = std_select(slt(i, N), i, N_minus_1); - clippedScalarAccessExprs[memRefDim] = - std_select(slt(i, zero), zero, select_1); - } else { - auto ii = ivs[loopIndex]; - auto i_plus_ii = i + ii; - auto N_minus_1 = N - one; - auto select_1 = std_select(slt(i_plus_ii, N), i_plus_ii, N_minus_1); - clippedScalarAccessExprs[memRefDim] = - std_select(slt(i_plus_ii, zero), zero, select_1); - } - } - - return clippedScalarAccessExprs; -} - namespace mlir { template @@ -497,6 +441,60 @@ {}, 0); } +static void emitWithBoundsChecks( + PatternRewriter &rewriter, VectorTransferOpInterface transfer, + ValueRange ivs, MemRefBoundsCapture &memRefBoundsCapture, + function_ref)> inBoundsFun, + function_ref)> outOfBoundsFun = nullptr) { + // Permute the incoming indices according to the permutation map. + SmallVector indices = + linalg::applyMapToValues(rewriter, transfer.getLoc(), + transfer.permutation_map(), transfer.indices()); + + // Generate a bounds check if necessary. + SmallVector majorIvsPlusOffsets; + Value inBoundsCondition = + emitInBoundsCondition(rewriter, transfer, 0, ivs, indices, + memRefBoundsCapture, majorIvsPlusOffsets); + + // Apply the permutation map to the ivs. The permutation map may not use all + // the inputs. + SmallVector scalarAccessExprs(transfer.indices().size()); + for (unsigned memRefDim = 0; memRefDim < transfer.indices().size(); + ++memRefDim) { + // Linear search on a small number of entries. + int loopIndex = -1; + auto exprs = transfer.permutation_map().getResults(); + for (auto en : llvm::enumerate(exprs)) { + auto expr = en.value(); + auto dim = expr.dyn_cast(); + // Sanity check. + assert((dim || expr.cast().getValue() == 0) && + "Expected dim or 0 in permutationMap"); + if (dim && memRefDim == dim.getPosition()) { + loopIndex = en.index(); + break; + } + } + + using namespace edsc::op; + auto i = transfer.indices()[memRefDim]; + scalarAccessExprs[memRefDim] = loopIndex < 0 ? i : i + ivs[loopIndex]; + } + + if (inBoundsCondition) + conditionBuilder( + /* scf.if */ inBoundsCondition, // { + [&] { inBoundsFun(scalarAccessExprs); }, + // } else { + outOfBoundsFun ? [&] { outOfBoundsFun(scalarAccessExprs); } + : function_ref() + // } + ); + else + inBoundsFun(scalarAccessExprs); +} + /// Lowers TransferReadOp into a combination of: /// 1. local memory allocation; /// 2. perfect loop nest over: @@ -588,17 +586,25 @@ Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer); StdIndexedValue local(tmp); loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { - auto ivs = llvm::to_vector<8>(loopIvs); + auto ivsStorage = llvm::to_vector<8>(loopIvs); // Swap the ivs which will reorder memory accesses. if (coalescedIdx >= 0) - std::swap(ivs.back(), ivs[coalescedIdx]); - // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). - SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); - ArrayRef indicesRef(indices), ivsRef(ivs); - Value pos = std_index_cast(IntegerType::get(32, ctx), ivsRef.back()); - Value scal = remote(indicesRef); - Value vector = vector_insert_element(scal, local(ivsRef.drop_back()), pos); - local(ivsRef.drop_back()) = vector; + std::swap(ivsStorage.back(), ivsStorage[coalescedIdx]); + + ArrayRef ivs(ivsStorage); + Value pos = std_index_cast(IntegerType::get(32, ctx), ivs.back()); + Value inVector = local(ivs.drop_back()); + auto loadValue = [&](ArrayRef indices) { + Value vector = vector_insert_element(remote(indices), inVector, pos); + local(ivs.drop_back()) = vector; + }; + auto loadPadding = [&](ArrayRef) { + Value vector = vector_insert_element(transfer.padding(), inVector, pos); + local(ivs.drop_back()) = vector; + }; + emitWithBoundsChecks( + rewriter, cast(transfer.getOperation()), ivs, + memRefBoundsCapture, loadValue, loadPadding); }); Value vectorValue = std_load(vector_type_cast(tmp)); @@ -674,17 +680,21 @@ Value vec = vector_type_cast(tmp); std_store(vectorValue, vec); loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { - auto ivs = llvm::to_vector<8>(loopIvs); - // Swap the ivs which will reorder memory accesses. + auto ivsStorage = llvm::to_vector<8>(loopIvs); + // Swap the ivsStorage which will reorder memory accesses. if (coalescedIdx >= 0) - std::swap(ivs.back(), ivs[coalescedIdx]); - // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). - SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); - ArrayRef indicesRef(indices), ivsRef(ivs); + std::swap(ivsStorage.back(), ivsStorage[coalescedIdx]); + + ArrayRef ivs(ivsStorage); Value pos = - std_index_cast(IntegerType::get(32, op->getContext()), ivsRef.back()); - Value scalar = vector_extract_element(local(ivsRef.drop_back()), pos); - remote(indices) = scalar; + std_index_cast(IntegerType::get(32, op->getContext()), ivs.back()); + auto storeValue = [&](ArrayRef indices) { + Value scalar = vector_extract_element(local(ivs.drop_back()), pos); + remote(indices) = scalar; + }; + emitWithBoundsChecks( + rewriter, cast(transfer.getOperation()), ivs, + memRefBoundsCapture, storeValue); }); // 3. Erase. diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir --- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir @@ -15,11 +15,13 @@ %ip3 = affine.apply affine_map<(d0) -> (d0 + 3)> (%i1) %f4 = vector.transfer_read %A[%i0, %ip3], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32> // Both accesses in the load must be clipped otherwise %i1 + 2 and %i1 + 3 will go out of bounds. - // CHECK: {{.*}} = select - // CHECK: %[[FILTERED1:.*]] = select - // CHECK: {{.*}} = select - // CHECK: %[[FILTERED2:.*]] = select - // CHECK: %{{.*}} = load {{.*}}[%[[FILTERED1]], %[[FILTERED2]]] : memref<7x42xf32> + // CHECK: scf.if + // CHECK-NEXT: load + // CHECK-NEXT: vector.insertelement + // CHECK-NEXT: store + // CHECK-NEXT: else + // CHECK-NEXT: vector.insertelement + // CHECK-NEXT: store } } return @@ -53,7 +55,6 @@ // ----- // CHECK: #[[$ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK: #[[$SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> // CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_read(%M: index, %N: index, %O: index, %P: index) { @@ -72,37 +73,18 @@ // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] { - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L0:.*]] = select - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L1:.*]] = select - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L2:.*]] = select - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L3:.*]] = select - // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] - // - // CHECK-NEXT: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref - // CHECK-NEXT: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> - // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> - // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK: %[[VIDX:.*]] = index_cast %[[I4]] + // CHECK: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK: %[[L0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) + // CHECK: %[[L3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) + // CHECK-NEXT: scf.if + // CHECK-NEXT: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[I1]], %[[I2]], %[[L3]]] : memref + // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> + // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: } else { + // CHECK-NEXT: %[[CVEC:.*]] = vector.insertelement + // CHECK-NEXT: store %[[CVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } @@ -132,7 +114,6 @@ // ----- // CHECK: #[[$ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK: #[[$SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> // CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_write(%M: index, %N: index, %O: index, %P: index) { @@ -153,37 +134,15 @@ // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] { - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[S0:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I1]], %[[I5]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[S1:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", %[[I2]], %{{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, %[[I2]], {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", %[[I2]], %[[C0]] : index - // CHECK-NEXT: %[[S2:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[S3:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] - // - // CHECK-NEXT: %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> - // CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> - // CHECK-NEXT: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : memref + // CHECK: %[[VIDX:.*]] = index_cast %[[I4]] + // CHECK: %[[S0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) + // CHECK: %[[S1:.*]] = affine.apply #[[$ADD]](%[[I1]], %[[I5]]) + // CHECK: %[[S3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) + // CHECK-NEXT: scf.if + // CHECK-NEXT: %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> + // CHECK: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[I2]], %[[S3]]] : memref + // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: }