diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -560,6 +560,12 @@ paddingValues.assign(pv.begin(), pv.end()); return *this; } + /// A list of iterator dimensions to pad. + SmallVector paddingDimensions; + LinalgPaddingOptions &setPaddingDimensions(ArrayRef pd) { + paddingDimensions.assign(pd.begin(), pd.end()); + return *this; + } /// A flag for every operand to mark the PadOp as nofold which enables packing /// for statically shaped operands. SmallVector packPaddings; @@ -1217,13 +1223,15 @@ PatternRewriter &rewriter) const override; }; -/// Pad the operands of `opToPad` to a static bounding box. Use `paddingValues` -/// and `packPaddings` to set the padding value and the nofold attribute of the -/// introduced tensor::PadOps, respectively. Update `paddedOp` to the cloned -/// statically shaped operation and return the extracted dynamically shaped -/// results. If padding fails, return failure. +/// Pad the iterator dimensions `paddingDimensions` of all `opToPad` operands to +/// a static bounding box. Use `paddingValues` and `packPaddings` to set padding +/// value and nofold attribute of the created tensor::PadOps, respectively. +/// Update `paddedOp` to the cloned operation with statically shaped +/// `paddingDimensions` and return the extracted dynamically shaped results. If +/// padding fails, return failure. FailureOr> rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, + ArrayRef paddingDimensions, ArrayRef paddingValues, ArrayRef packPaddings, LinalgOp &paddedOp); diff --git a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h @@ -15,11 +15,11 @@ namespace tensor { // Return a PadOp that pads `source` to `type` size where the static -// sizes are assumed to be greater than the dynamic sizes. The op performs -// "high" padding (i.e. it adds trailing padding values until the desired -// size is met). -PadOp createPadHighOp(Type type, Value source, Value pad, bool nofold, - Location loc, OpBuilder &builder); +// sizes are assumed to be greater than the dynamic sizes. If `type` has dynamic +// dimensions the padding width is set to zero. The op performs "high" padding +// (i.e. it adds trailing padding values until the desired size is met). +PadOp createPadHighOp(RankedTensorType type, Value source, Value pad, + bool nofold, Location loc, OpBuilder &builder); // Return a PadOp that pads `source to `type` size with `pad` value. // I.e., a block will be created and the `pad` value will be yielded diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -158,29 +158,41 @@ return *this; } -/// Pad `opOperand` using the provided `paddingValues`. Exit early for scalar -/// operands, if `paddingValues` contains no value for the `opOperand`, or if -/// `opOperand` is not defined by an ExtractSliceOp. Otherwise, try to pad the -/// operand even if it already has a static shape. Set `result` to the result of -/// the created tensor::PadOp or and return success if the operand either has -/// been padded to a static shape or already had a static shape and failure -/// otherwise. -static LogicalResult padOperandToSmallestStaticBoundingBox( +/// Pad the `opOperand` in the `paddingDimensions` using the padding value and +/// the nofold flag found in `paddingValues` and `packPaddings`, respectively. +/// Exit early and return the `opOperand` value if the shape dimensions that +/// match `paddingDimensions` have a static size and the nofold flag is not set. +/// Otherwise, try to pad the shape dimensions that match the iterator +/// dimensions `paddingDimensions` and return the tensor::PadOp result if +/// padding succeeds or failure otherwise. +static FailureOr padOperandToSmallestStaticBoundingBox( OpBuilder &b, linalg::LinalgOp opToPad, OpOperand *opOperand, - ArrayRef paddingValues, ArrayRef packPaddings, - Value &result) { - // Get the shape of the operand and check if it has a dynamic shape. Only - // return failure if the operand is not a scalar and has a dynamic shape. + ArrayRef paddingDimensions, ArrayRef paddingValues, + ArrayRef packPaddings) { + AffineMap indexingMap = opToPad.getTiedIndexingMap(opOperand); ArrayRef shape = opToPad.getShape(opOperand); - bool hasDynamicShape = llvm::is_contained(shape, ShapedType::kDynamicSize); - // Cannot pad scalar operands. - if (shape.empty()) - return success(); + // Collect the shape dimension that are a function of the `paddingDimensions`. + llvm::SmallDenseSet shapeDimsToPad; + for (int64_t dim : paddingDimensions) + for (const auto &en : enumerate(indexingMap.getResults())) + if (en.value().isFunctionOfDim(dim)) + shapeDimsToPad.insert(en.index()); - // Cannot pad if the padding value is unknown. + // Return the unpadded operand if padding to a static shape is not needed and + // if the nofold flag is not set. + bool nofold = opOperand->getOperandNumber() < packPaddings.size() + ? packPaddings[opOperand->getOperandNumber()] + : false; + bool hasStaticShape = llvm::none_of(shapeDimsToPad, [&](int64_t dim) { + return ShapedType::isDynamic(shape[dim]); + }); + if (!nofold && hasStaticShape) + return opOperand->get(); + + // Fail if `paddingValues` specifies no padding value. if (opOperand->getOperandNumber() >= paddingValues.size()) - return failure(hasDynamicShape); + return failure(); Attribute paddingAttr = paddingValues[opOperand->getOperandNumber()]; Value paddingValue = b.create( opToPad.getLoc(), paddingAttr.getType(), paddingAttr); @@ -192,27 +204,31 @@ currOpOperand = linalgOp.getOutputOperand(result.getResultNumber()); } - // Cannot construct a static bounding box if the `currOpOperand` is not - // defined by an ExtractSliceOp. + // Fail if `currOpOperand` is not defined by an ExtractSliceOp. auto sliceOp = currOpOperand->get().getDefiningOp(); if (!sliceOp) - return failure(hasDynamicShape); + return failure(); // Compute the dropped dimensions if `sliceOp` is ranke-reducing. llvm::SmallBitVector droppedDims = sliceOp.getDroppedDims(); + OffsetSizeAndStrideOpInterface shapedOp = sliceOp; // Upper bound the `sliceOp` sizes to obtain a static bounding box. - SmallVector staticSizes; - staticSizes.reserve(shape.size()); - auto shapedOp = cast(sliceOp.getOperation()); + SmallVector paddedShape(shape.begin(), shape.end()); + int64_t shapeIdx = 0; for (const auto &en : enumerate(shapedOp.getMixedSizes())) { // Skip dropped dimensions. if (droppedDims.test(en.index())) continue; - // If the size is an attribute add it directly to `staticSizes`. + // Skip dimensions that do not require padding. + if (!shapeDimsToPad.contains(shapeIdx)) { + shapeIdx++; + continue; + } + // If the size is an attribute add it directly to `paddedShape`. if (en.value().is()) { - staticSizes.push_back( - en.value().get().dyn_cast().getInt()); + paddedShape[shapeIdx++] = + en.value().get().dyn_cast().getInt(); continue; } // Otherwise, try to compute a constant upper bound for the size value. @@ -222,24 +238,21 @@ LLVM_DEBUG(DBGS() << "No constant bounding box can be found for padding"); return failure(); } - staticSizes.push_back(upperBound.getValue()); + paddedShape[shapeIdx++] = upperBound.getValue(); } - assert(staticSizes.size() == shape.size() && + assert(shapeIdx == static_cast(shape.size()) && "expect the dynamic and static ranks to match"); - // Pad the operand to the bounding box defined by `staticSizes`. - auto staticTensorType = RankedTensorType::get( - staticSizes, getElementTypeOrSelf(opOperand->get())); - bool nofold = opOperand->getOperandNumber() < packPaddings.size() - ? packPaddings[opOperand->getOperandNumber()] - : false; - result = makeComposedPadHighOp(b, opToPad->getLoc(), staticTensorType, - opOperand->get(), paddingValue, nofold); - return success(); + // Pad the operand to the bounding box defined by `paddedShape`. + auto paddedTensorType = RankedTensorType::get( + paddedShape, getElementTypeOrSelf(opOperand->get())); + return makeComposedPadHighOp(b, opToPad->getLoc(), paddedTensorType, + opOperand->get(), paddingValue, nofold); } FailureOr> linalg::rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, + ArrayRef paddingDimensions, ArrayRef paddingValues, ArrayRef packPaddings, LinalgOp &paddedOp) { Location loc = opToPad->getLoc(); @@ -255,13 +268,12 @@ SmallVector newOperands; newOperands.reserve(opToPad.getNumInputsAndOutputs()); for (OpOperand *opOperand : opToPad.getInputAndOutputOperands()) { - Value paddedOperand; - // If padding was requested but the shape cannot be bounded statically then - // the pattern fails to apply. - if (failed(padOperandToSmallestStaticBoundingBox( - b, opToPad, opOperand, paddingValues, packPaddings, paddedOperand))) + FailureOr paddedOperand = padOperandToSmallestStaticBoundingBox( + b, opToPad, opOperand, paddingDimensions, paddingValues, packPaddings); + // Exit if `paddingDimensions` cannot be bounded statically. + if (failed(paddedOperand)) return failure(); - newOperands.push_back(paddedOperand ? paddedOperand : opOperand->get()); + newOperands.push_back(*paddedOperand); } SmallVector> reifiedResultShapes; @@ -502,8 +514,8 @@ // Pad the operation. LinalgOp paddedOp; FailureOr> newResults = - rewriteAsPaddedOp(rewriter, linalgOp, options.paddingValues, - options.packPaddings, paddedOp); + rewriteAsPaddedOp(rewriter, linalgOp, options.paddingDimensions, + options.paddingValues, options.packPaddings, paddedOp); if (failed(newResults)) return failure(); @@ -511,10 +523,16 @@ for (const auto &en : enumerate(options.hoistPaddings)) { if (static_cast(en.index()) >= paddedOp.getNumInputsAndOutputs()) break; - OpOperand &opOperand = paddedOp->getOpOperand(en.index()); - auto padOp = opOperand.get().getDefiningOp(); + OpOperand *opOperand = &paddedOp->getOpOperand(en.index()); + auto padOp = opOperand->get().getDefiningOp(); if (!padOp || en.value() == 0) continue; + + // Fail hoisting if the operand shape is not fully static. + if (llvm::any_of(paddedOp.getShape(opOperand), + [](int64_t size) { return ShapedType::isDynamic(size); })) + return failure(); + tensor::PadOp hoistedOp; SmallVector transposeOps; SmallVector transposeVector = diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -322,8 +322,6 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold) { - assert(type.hasStaticShape() && "expect tensor type to have static shape"); - // Exit if `source` is not defined by an ExtractSliceOp. auto sliceOp = source.getDefiningOp(); if (!sliceOp) diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp --- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp @@ -25,8 +25,8 @@ auto padTensorOp = builder.create(loc, type, source, low, high, nofold); int rank = padTensorOp.getResultType().getRank(); - SmallVector blockArgTypes(rank, builder.getIndexType()); - SmallVector blockArgLocs(rank, loc); + SmallVector blockArgTypes(rank, builder.getIndexType()); + SmallVector blockArgLocs(rank, loc); auto ®ion = padTensorOp.region(); // `builder.createBlock` changes the insertion point within the block. Create // a guard to reset the insertion point of the builder after it is destroyed. @@ -36,19 +36,22 @@ return padTensorOp; } -PadOp mlir::tensor::createPadHighOp(Type type, Value source, Value pad, - bool nofold, Location loc, OpBuilder &b) { - SmallVector low, high; - auto rankedTensorType = type.cast(); - assert(rankedTensorType.hasStaticShape()); - for (const auto &en : enumerate(rankedTensorType.getShape())) { +PadOp mlir::tensor::createPadHighOp(RankedTensorType type, Value source, + Value pad, bool nofold, Location loc, + OpBuilder &b) { + auto zero = b.createOrFold(loc, 0); + SmallVector low(type.getRank(), zero); + SmallVector high(type.getRank(), zero); + for (const auto &en : enumerate(type.getShape())) { + // Pad only the static dimensions of the result tensor type. + if (ShapedType::isDynamic(en.value())) + continue; + // Compute the padding width. AffineExpr d0; bindDims(b.getContext(), d0); auto dimOp = b.createOrFold(loc, source, en.index()); - Value paddingWidth = - makeComposedAffineApply(b, loc, en.value() - d0, {dimOp}); - high.push_back(paddingWidth); - low.push_back(b.createOrFold(loc, 0)); + high[en.index()] = + makeComposedAffineApply(b, loc, en.value() - d0, {dimOp}).getResult(); } return createPadScalarOp(type, source, pad, low, high, nofold, loc, b); } diff --git a/mlir/test/Dialect/Linalg/codegen-strategy.mlir b/mlir/test/Dialect/Linalg/codegen-strategy.mlir --- a/mlir/test/Dialect/Linalg/codegen-strategy.mlir +++ b/mlir/test/Dialect/Linalg/codegen-strategy.mlir @@ -1,9 +1,9 @@ // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=2,4,8 vectorize vectorize-contraction-to=matrixintrinsics unroll-vector-transfers=true" -split-input-file | FileCheck %s --check-prefix=CHECK-INTRINSIC // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 promote promote-full-tile-pad register-tile-sizes=2,4,8 vectorize vectorize-contraction-to=outerproduct split-transfers=true unroll-vector-transfers=false" -split-input-file | FileCheck %s --check-prefix=CHECK-OUTER // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 tile-interchange=1,2,0 generalize iterator-interchange=0,2,1" -split-input-file | FileCheck %s --check-prefix=CHECK-INTERCHANGE -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 pad padding-values=0.:f32,0.:f32,0.:f32 pack-paddings=1,1,0 hoist-paddings=3,3,0" -split-input-file | FileCheck %s --check-prefix=CHECK-PAD -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 fuse pad padding-values=0.:f32,0.:f32,0.:f32 vectorize" -split-input-file | FileCheck %s --check-prefix=CHECK-FUSE -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=conv anchor-op=linalg.conv_2d_nhwc_hwcf tile-sizes=1,1,8,32,1,1,8 fuse pad padding-values=0.:f32,0.:f32,0.:f32 decompose vectorize vectorize-padding" -split-input-file | FileCheck %s --check-prefix=CHECK-DECOMP +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1,2 pack-paddings=1,1,0 hoist-paddings=3,3,0" -split-input-file | FileCheck %s --check-prefix=CHECK-PAD +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 fuse pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1,2 vectorize" -split-input-file | FileCheck %s --check-prefix=CHECK-FUSE +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=conv anchor-op=linalg.conv_2d_nhwc_hwcf tile-sizes=1,1,8,32,1,1,8 fuse pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1,2 decompose vectorize vectorize-padding" -split-input-file | FileCheck %s --check-prefix=CHECK-DECOMP // CHECK-INTRINSIC: func @matmul( // CHECK-OUTER: func @matmul( diff --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir --- a/mlir/test/Dialect/Linalg/pad.mlir +++ b/mlir/test/Dialect/Linalg/pad.mlir @@ -1,7 +1,8 @@ -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 pack-paddings=1,1,0 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=MATMUL -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad padding-values=0.:f32,1.:f32 pack-paddings=1,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad padding-values=0.:f32,0.:f32 pack-paddings=1,0 run-enable-pass=false" -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 pack-paddings=1,1,0 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL-MATMUL -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32 pack-paddings=1,1,0 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1,2 pack-paddings=1,1,0 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=MATMUL +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad padding-values=0.:f32,1.:f32 pack-paddings=0,1 padding-dimensions=0,1,2 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad padding-values=0.:f32,0.:f32 pack-paddings=0,1 padding-dimensions=0,1,2 run-enable-pass=false" -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1,2 pack-paddings=0,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL-MATMUL +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32 pack-paddings=1,1,0 padding-dimensions=0,1,2 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1 pack-paddings=1,1,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=PARTIAL // MATMUL-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 12, 7)> // MATMUL-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)> @@ -503,3 +504,34 @@ %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32> func.return %1 : tensor<1x?x?xf32> } + +// ----- + +#map0 = affine_map<()[s0] -> (7, s0)> + +// PARTIAL: padding_the_output_dims_only +func.func @padding_the_output_dims_only(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { + // PARTIAL-DAG: %[[C0:.*]] = arith.constant 0 : index + // PARTIAL-DAG: %[[TS:.*]] = affine.apply + %0 = affine.min #map0()[%iv2] + + // Check only the output dimensions of the matmul are padded. + // PARTIAL: %[[T0:.*]] = tensor.pad + // PARTIAL-SAME: [%[[TS]], %[[C0]] + // PARTIAL: %[[T1:.*]] = tensor.pad + // PARTIAL-SAME: [%[[C0]], %[[TS]] + // PARTIAL: %[[T2:.*]] = tensor.pad + // PARTIAL-SAME: [%[[TS]], %[[TS]] + %1 = tensor.extract_slice %arg0[%iv0, %iv2] [%0, %0] [1, 1] : tensor<24x12xf32> to tensor + %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, %0] [1, 1] : tensor<12x25xf32> to tensor + %3 = tensor.extract_slice %arg2[%iv0, %iv1] [%0, %0] [1, 1] : tensor<24x25xf32> to tensor + + // PARTIAL: = linalg.matmul ins(%[[T0]], %[[T1]] + // PARTIAL-SAME: outs(%[[T2]] + %4 = linalg.matmul ins(%1, %2 : tensor, tensor) outs(%3 : tensor) -> tensor + %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [%0, %0] [1, 1] : tensor into tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> +} diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp --- a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp @@ -101,6 +101,10 @@ *this, "padding-values", llvm::cl::desc("Operand padding values parsed by the attribute parser."), llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated}; + ListOption paddingDimensions{ + *this, "padding-dimensions", + llvm::cl::desc("Operation iterator dimensions to pad."), + llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated}; ListOption packPaddings{ *this, "pack-paddings", llvm::cl::desc("Operand packing flags."), llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated}; @@ -267,6 +271,8 @@ LinalgPaddingOptions paddingOptions; paddingOptions.setPaddingValues(paddingValueAttributes); + paddingOptions.setPaddingDimensions( + SmallVector{paddingDimensions.begin(), paddingDimensions.end()}); paddingOptions.setPackPaddings( SmallVector{packPaddings.begin(), packPaddings.end()}); paddingOptions.setHoistPaddings(