diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -1606,8 +1606,8 @@ reference front() { return Storage.front(); } const_reference front() const { return Storage.front(); } - operator std::vector<DataType>&() { return Storage; } - operator ArrayRef<DataType>() { return Storage; } + operator std::vector<DataType> &() { return Storage; } + operator ArrayRef<DataType>() const { return Storage; } std::vector<DataType> *operator&() { return &Storage; } const std::vector<DataType> *operator&() const { return &Storage; } diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h --- a/mlir/include/mlir/Dialect/Affine/Passes.h +++ b/mlir/include/mlir/Dialect/Affine/Passes.h @@ -59,7 +59,7 @@ /// and no callback is provided, anything passed from the command-line (if at /// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor). std::unique_ptr<OperationPass<FuncOp>> createLoopUnrollPass( - int unrollFactor = -1, int unrollFull = -1, + int unrollFactor = -1, bool unrollFull = false, const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr); /// Creates a loop unroll jam pass to unroll jam by the specified factor. A diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -18,6 +18,28 @@ def AffineDataCopyGeneration : FunctionPass<"affine-data-copy-generate"> { let summary = "Generate explicit copying for affine memory operations"; let constructor = "mlir::createAffineDataCopyGenerationPass()"; + let options = [ + Option<"fastMemoryCapacity", "fast-mem-capacity", "uint64_t", + /*default=*/"std::numeric_limits<uint64_t>::max()", + "Set fast memory space capacity in KiB (default: unlimited)">, + Option<"fastMemorySpace", "fast-mem-space", "unsigned", + /*default=*/"1", + "Fast memory space identifier for copy generation (default: 1)">, + Option<"generateDma", "generate-dma", "bool", + /*default=*/"true", "Generate DMA instead of point-wise copy">, + Option<"minDmaTransferSize", "min-dma-transfer", "int", + /*default=*/"1024", + "Minimum DMA transfer size supported by the target in bytes">, + Option<"slowMemorySpace", "slow-mem-space", "unsigned", + /*default=*/"0", + "Slow memory space identifier for copy generation (default: 0)">, + Option<"skipNonUnitStrideLoops", "skip-non-unit-stride-loops", "bool", + /*default=*/"false", "Testing purposes: avoid non-unit stride loop " + "choice depths for copy placement">, + Option<"tagMemorySpace", "tag-mem-space", "unsigned", + /*default=*/"0", + "Tag memory space identifier for copy generation (default: 0)">, + ]; } def AffineLoopInvariantCodeMotion @@ -29,16 +51,44 @@ def AffineLoopTiling : FunctionPass<"affine-loop-tile"> { let summary = "Tile affine loop nests"; let constructor = "mlir::createLoopTilingPass()"; + let options = [ + Option<"cacheSizeInKiB", "cache-size", "uint64_t", /*default=*/"512", + "Set size of cache to tile for in KiB">, + Option<"separate", "separate", "bool", /*default=*/"", + "Separate full and partial tiles">, + Option<"tileSize", "tile-size", "unsigned", /*default=*/"", + "Use this tile size for all loops">, + ListOption<"tileSizes", "tile-sizes", "unsigned", + "List of tile sizes for each perfect nest " + "(overridden by -tile-size)", + "llvm::cl::ZeroOrMore">, + ]; } def AffineLoopUnroll : FunctionPass<"affine-loop-unroll"> { let summary = "Unroll affine loops"; let constructor = "mlir::createLoopUnrollPass()"; + let options = [ + Option<"unrollFactor", "unroll-factor", "unsigned", /*default=*/"4", + "Use this unroll factor for all loops being unrolled">, + Option<"unrollFull", "unroll-full", "bool", /*default=*/"false", + "Fully unroll loops">, + Option<"numRepetitions", "unroll-num-reps", "unsigned", /*default=*/"1", + "Unroll innermost loops repeatedly this many times">, + Option<"unrollFullThreshold", "unroll-full-threshold", "unsigned", + /*default=*/"1", + "Unroll all loops with trip count less than or equal to this">, + ]; } def AffineLoopUnrollAndJam : FunctionPass<"affine-loop-unroll-jam"> { let summary = "Unroll and jam affine loops"; let constructor = "mlir::createLoopUnrollAndJamPass()"; + let options = [ + Option<"unrollJamFactor", "unroll-jam-factor", "unsigned", + /*default=*/"4", + "Use this unroll jam factor for all loops (default 4)">, + ]; } def AffineVectorize : FunctionPass<"affine-super-vectorize"> { diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -514,6 +514,9 @@ /// Return if the given ElementsAttr should be elided. bool shouldElideElementsAttr(ElementsAttr attr) const; + /// Return the size limit for printing large ElementsAttr. + Optional<int64_t> getLargeElementsAttrLimit() const; + /// Return if debug information should be printed. bool shouldPrintDebugInfo() const; diff --git a/mlir/include/mlir/Pass/PassOptions.h b/mlir/include/mlir/Pass/PassOptions.h --- a/mlir/include/mlir/Pass/PassOptions.h +++ b/mlir/include/mlir/Pass/PassOptions.h @@ -42,6 +42,9 @@ /// Return the argument string of this option. StringRef getArgStr() const { return getOption()->ArgStr; } + /// Returns true if this option has any value assigned to it. + bool hasValue() const { return optHasValue; } + protected: /// Return the main option instance. virtual const llvm::cl::Option *getOption() const = 0; @@ -49,6 +52,9 @@ /// Copy the value from the given option into this one. virtual void copyValueFrom(const OptionBase &other) = 0; + /// Flag indicating if this option has a value. + bool optHasValue = false; + /// Allow access to private methods. friend PassOptions; }; @@ -113,10 +119,17 @@ assert(!this->isPositional() && !this->isSink() && "sink and positional options are not supported"); parent.options.push_back(this); + + // Set a callback to track if this option has a value. + this->setCallback([this](const auto &) { this->optHasValue = true; }); } + ~Option() override = default; using llvm::cl::opt<DataType, /*ExternalStorage=*/false, OptionParser>::operator=; - ~Option() override = default; + Option &operator=(const Option &other) { + *this = other.getValue(); + return *this; + } private: /// Return the main option instance. @@ -132,6 +145,7 @@ void copyValueFrom(const OptionBase &other) final { this->setValue(static_cast<const Option<DataType, OptionParser> &>(other) .getValue()); + optHasValue = other.optHasValue; } }; @@ -149,16 +163,26 @@ assert(!this->isPositional() && !this->isSink() && "sink and positional options are not supported"); parent.options.push_back(this); + + // Set a callback to track if this option has a value. + this->setCallback([this](const auto &) { this->optHasValue = true; }); } ~ListOption() override = default; + ListOption<DataType, OptionParser> & + operator=(const ListOption<DataType, OptionParser> &other) { + *this = ArrayRef<DataType>(other); + this->optHasValue = other.optHasValue; + return *this; + } /// Allow assigning from an ArrayRef. ListOption<DataType, OptionParser> &operator=(ArrayRef<DataType> values) { - (*this)->assign(values.begin(), values.end()); + ((std::vector<DataType> &)*this).assign(values.begin(), values.end()); + optHasValue = true; return *this; } - std::vector<DataType> *operator->() { return &*this; } + MutableArrayRef<DataType> operator->() const { return &*this; } private: /// Return the main option instance. @@ -175,9 +199,7 @@ /// Copy the value from the given option into this one. void copyValueFrom(const OptionBase &other) final { - (*this) = ArrayRef<DataType>( - (ListOption<DataType, OptionParser> &)(const_cast<OptionBase &>( - other))); + *this = static_cast<const ListOption<DataType, OptionParser> &>(other); } }; diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td --- a/mlir/include/mlir/Transforms/Passes.td +++ b/mlir/include/mlir/Transforms/Passes.td @@ -15,6 +15,24 @@ include "mlir/Pass/PassBase.td" +def AffineLoopFusion : FunctionPass<"affine-loop-fusion"> { + let summary = "Fuse affine loop nests"; + let constructor = "mlir::createLoopFusionPass()"; + let options = [ + Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double", + /*default=*/"0.30f", "Fractional increase in additional computation " + "tolerated while fusing">, + Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned", + /*default=*/"0", + "Faster memory space number to promote fusion buffers to">, + Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t", + /*default=*/"0", "Threshold size (KiB) for promoting local buffers " + "to fast memory space">, + Option<"maximalFusion", "fusion-maximal", "bool", /*default=*/"false", + "Enables maximal loop fusion">, + ]; +} + def AffinePipelineDataTransfer : FunctionPass<"affine-pipeline-data-transfer"> { let summary = "Pipeline non-blocking data transfers between explicitly " @@ -84,11 +102,6 @@ let constructor = "mlir::createPipelineDataTransferPass()"; } -def AffineLoopFusion : FunctionPass<"affine-loop-fusion"> { - let summary = "Fuse affine loop nests"; - let constructor = "mlir::createLoopFusionPass()"; -} - def Canonicalizer : Pass<"canonicalize"> { let summary = "Canonicalize operations"; let constructor = "mlir::createCanonicalizerPass()"; @@ -106,6 +119,14 @@ def Inliner : Pass<"inline"> { let summary = "Inline function calls"; let constructor = "mlir::createInlinerPass()"; + let options = [ + Option<"disableCanonicalization", "disable-simplify", "bool", + /*default=*/"false", + "Disable running simplifications during inlining">, + Option<"maxInliningIterations", "max-iterations", "unsigned", + /*default=*/"4", + "Maximum number of iterations when inlining within an SCC">, + ]; } def LocationSnapshot : Pass<"snapshot-op-locations"> { @@ -113,7 +134,7 @@ let constructor = "mlir::createLocationSnapshotPass()"; let options = [ Option<"fileName", "filename", "std::string", /*default=*/"", - "The filename to print the generated IR.">, + "The filename to print the generated IR">, Option<"tag", "tag", "std::string", /*default=*/"", "A tag to use when fusing the new locations with the " "original. If unset, the locations are replaced.">, diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp --- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp +++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp @@ -61,8 +61,8 @@ ImperfectlyNestedForLoopMapper() = default; ImperfectlyNestedForLoopMapper(ArrayRef<int64_t> numWorkGroups, ArrayRef<int64_t> workGroupSize) { - this->numWorkGroups->assign(numWorkGroups.begin(), numWorkGroups.end()); - this->workGroupSize->assign(workGroupSize.begin(), workGroupSize.end()); + this->numWorkGroups = numWorkGroups; + this->workGroupSize = workGroupSize; } void runOnFunction() override { diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp --- a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp @@ -35,32 +35,6 @@ using namespace mlir; -static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options"); - -static llvm::cl::opt<unsigned long long> clFastMemoryCapacity( - "affine-data-copy-generate-fast-mem-capacity", - llvm::cl::desc( - "Set fast memory space capacity in KiB (default: unlimited)"), - llvm::cl::cat(clOptionsCategory)); - -static llvm::cl::opt<bool> - clDma("affine-data-copy-generate-dma", - llvm::cl::desc("Generate DMA instead of point-wise copy"), - llvm::cl::cat(clOptionsCategory), llvm::cl::init(true)); - -static llvm::cl::opt<unsigned> clFastMemorySpace( - "affine-data-copy-generate-fast-mem-space", llvm::cl::init(1), - llvm::cl::desc( - "Fast memory space identifier for copy generation (default: 1)"), - llvm::cl::cat(clOptionsCategory)); - -static llvm::cl::opt<bool> clSkipNonUnitStrideLoop( - "affine-data-copy-generate-skip-non-unit-stride-loops", llvm::cl::Hidden, - llvm::cl::init(false), - llvm::cl::desc("Testing purposes: avoid non-unit stride loop choice depths " - "for copy placement"), - llvm::cl::cat(clOptionsCategory)); - namespace { /// Replaces all loads and stores on memref's living in 'slowMemorySpace' by @@ -76,51 +50,22 @@ // are strided. Check for strided stores. struct AffineDataCopyGeneration : public AffineDataCopyGenerationBase<AffineDataCopyGeneration> { - explicit AffineDataCopyGeneration( - unsigned slowMemorySpace = 0, - unsigned fastMemorySpace = clFastMemorySpace, unsigned tagMemorySpace = 0, - int minDmaTransferSize = 1024, - uint64_t fastMemCapacityBytes = - (clFastMemoryCapacity.getNumOccurrences() > 0 - ? clFastMemoryCapacity * 1024 // cl-provided size is in KiB - : std::numeric_limits<uint64_t>::max()), - bool generateDma = clDma, - bool skipNonUnitStrideLoops = clSkipNonUnitStrideLoop) - : slowMemorySpace(slowMemorySpace), fastMemorySpace(fastMemorySpace), - tagMemorySpace(tagMemorySpace), minDmaTransferSize(minDmaTransferSize), - fastMemCapacityBytes(fastMemCapacityBytes), generateDma(generateDma), - skipNonUnitStrideLoops(skipNonUnitStrideLoops) {} - - explicit AffineDataCopyGeneration(const AffineDataCopyGeneration &other) - : AffineDataCopyGenerationBase<AffineDataCopyGeneration>(other), - slowMemorySpace(other.slowMemorySpace), - fastMemorySpace(other.fastMemorySpace), - tagMemorySpace(other.tagMemorySpace), - minDmaTransferSize(other.minDmaTransferSize), - fastMemCapacityBytes(other.fastMemCapacityBytes), - generateDma(other.generateDma), - skipNonUnitStrideLoops(other.skipNonUnitStrideLoops) {} + AffineDataCopyGeneration() = default; + explicit AffineDataCopyGeneration(unsigned slowMemorySpace, + unsigned fastMemorySpace, + unsigned tagMemorySpace, + int minDmaTransferSize, + uint64_t fastMemCapacityBytes) { + this->slowMemorySpace = slowMemorySpace; + this->fastMemorySpace = fastMemorySpace; + this->tagMemorySpace = tagMemorySpace; + this->minDmaTransferSize = minDmaTransferSize; + this->fastMemoryCapacity = fastMemCapacityBytes / 1024; + } void runOnFunction() override; LogicalResult runOnBlock(Block *block, DenseSet<Operation *> ©Nests); - // Slow memory space associated with copies. - const unsigned slowMemorySpace; - // Fast memory space associated with copies. - unsigned fastMemorySpace; - // Memory space associated with DMA tags. - unsigned tagMemorySpace; - // Minimum DMA transfer size supported by the target in bytes. - const int minDmaTransferSize; - // Capacity of the faster memory space. - uint64_t fastMemCapacityBytes; - - // If set, generate DMA operations instead of read/write. - bool generateDma; - - // If set, ignore loops with steps other than 1. - bool skipNonUnitStrideLoops; - // Constant zero index to avoid too many duplicates. Value zeroIndex = nullptr; }; @@ -153,6 +98,10 @@ if (block->empty()) return success(); + uint64_t fastMemCapacityBytes = + fastMemoryCapacity != std::numeric_limits<uint64_t>::max() + ? fastMemoryCapacity * 1024 + : fastMemoryCapacity; AffineCopyOptions copyOptions = {generateDma, slowMemorySpace, fastMemorySpace, tagMemorySpace, fastMemCapacityBytes}; diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp --- a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp @@ -28,40 +28,15 @@ #define DEBUG_TYPE "affine-loop-tile" -static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options"); - -static llvm::cl::opt<unsigned long long> - clCacheSizeKiB("affine-tile-cache-size", - llvm::cl::desc("Set size of cache to tile for in KiB"), - llvm::cl::cat(clOptionsCategory)); - -// Separate full and partial tiles. -static llvm::cl::opt<bool> - clSeparate("affine-tile-separate", - llvm::cl::desc("Separate full and partial tiles"), - llvm::cl::cat(clOptionsCategory)); - -// Tile size to use for all loops (overrides -tile-sizes if provided). -static llvm::cl::opt<unsigned> - clTileSize("affine-tile-size", - llvm::cl::desc("Use this tile size for all loops"), - llvm::cl::cat(clOptionsCategory)); - -// List of tile sizes. If any of them aren't provided, they are filled with -// clTileSize / kDefaultTileSize. -static llvm::cl::list<unsigned> clTileSizes( - "affine-tile-sizes", - llvm::cl::desc( - "List of tile sizes for each perfect nest (overridden by -tile-size)"), - llvm::cl::ZeroOrMore, llvm::cl::cat(clOptionsCategory)); - namespace { /// A pass to perform loop tiling on all suitable loop nests of a Function. struct LoopTiling : public AffineLoopTilingBase<LoopTiling> { - explicit LoopTiling(uint64_t cacheSizeBytes = kDefaultCacheMemCapacity, - bool avoidMaxMinBounds = true) - : cacheSizeBytes(cacheSizeBytes), avoidMaxMinBounds(avoidMaxMinBounds) {} + LoopTiling() = default; + explicit LoopTiling(uint64_t cacheSizeBytes, bool avoidMaxMinBounds = true) + : avoidMaxMinBounds(avoidMaxMinBounds) { + this->cacheSizeInKiB = cacheSizeBytes / 1024; + } void runOnFunction() override; void getTileSizes(ArrayRef<AffineForOp> band, @@ -69,12 +44,9 @@ // Default tile size if nothing is provided. constexpr static unsigned kDefaultTileSize = 4; - constexpr static uint64_t kDefaultCacheMemCapacity = 512 * 1024UL; - // Capacity of the cache to tile for. - uint64_t cacheSizeBytes; // If true, tile sizes are set to avoid max/min in bounds if possible. - bool avoidMaxMinBounds; + bool avoidMaxMinBounds = true; }; } // end anonymous namespace @@ -316,23 +288,19 @@ if (band.empty()) return; - tileSizes->resize(band.size()); - - // Use clTileSize for all loops if specified. - if (clTileSize.getNumOccurrences() > 0) { - std::fill(tileSizes->begin(), tileSizes->end(), clTileSize); + // Use tileSize for all loops if specified. + if (tileSize.hasValue()) { + tileSizes->assign(band.size(), tileSize); return; } - // Use clTileSizes and fill them with default tile size if it's short. - if (!clTileSizes.empty()) { - std::fill(tileSizes->begin(), tileSizes->end(), - LoopTiling::kDefaultTileSize); - std::copy(clTileSizes.begin(), - clTileSizes.begin() + std::min(clTileSizes.size(), band.size()), - tileSizes->begin()); + // Use tileSizes and fill them with default tile size if it's short. + if (!this->tileSizes.empty()) { + tileSizes->assign(this->tileSizes.begin(), this->tileSizes.end()); + tileSizes->resize(band.size(), kDefaultTileSize); return; } + tileSizes->resize(band.size()); // The first loop in the band. auto rootForOp = band[0]; @@ -356,6 +324,7 @@ } // Check how many times larger the cache size is when compared to footprint. + uint64_t cacheSizeBytes = cacheSizeInKiB * 1024; uint64_t excessFactor = llvm::divideCeil(fp.getValue(), cacheSizeBytes); if (excessFactor <= 1) { // No need of any tiling - set tile size to 1. @@ -388,10 +357,6 @@ } void LoopTiling::runOnFunction() { - // Override cache size if provided on command line. - if (clCacheSizeKiB.getNumOccurrences() > 0) - cacheSizeBytes = clCacheSizeKiB * 1024; - // Bands of loops to tile. std::vector<SmallVector<AffineForOp, 6>> bands; getTileableBands(getFunction(), &bands); @@ -399,7 +364,7 @@ // Tile each band. for (auto &band : bands) { // Set up tile sizes; fill missing tile sizes at the end with default tile - // size or clTileSize if one was provided. + // size or tileSize if one was provided. SmallVector<unsigned, 6> tileSizes; getTileSizes(band, &tileSizes); if (llvm::DebugFlag) { @@ -413,7 +378,7 @@ return signalPassFailure(); // Separate full and partial tiles. - if (clSeparate) { + if (separate) { auto intraTileLoops = MutableArrayRef<AffineForOp>(tiledNest).drop_front(band.size()); separateFullTiles(intraTileLoops); @@ -422,4 +387,3 @@ } constexpr unsigned LoopTiling::kDefaultTileSize; -constexpr uint64_t LoopTiling::kDefaultCacheMemCapacity; diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp --- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp @@ -59,24 +59,27 @@ /// with trip count less than the specified threshold. The latter is for testing /// purposes, especially for testing outer loop unrolling. struct LoopUnroll : public AffineLoopUnrollBase<LoopUnroll> { - const Optional<unsigned> unrollFactor; - const Optional<bool> unrollFull; // Callback to obtain unroll factors; if this has a callable target, takes // precedence over command-line argument or passed argument. const std::function<unsigned(AffineForOp)> getUnrollFactor; + LoopUnroll() : getUnrollFactor(nullptr) {} + LoopUnroll(const LoopUnroll &other) + : AffineLoopUnrollBase<LoopUnroll>(other), + getUnrollFactor(other.getUnrollFactor) {} explicit LoopUnroll( - Optional<unsigned> unrollFactor = None, Optional<bool> unrollFull = None, + Optional<unsigned> unrollFactor = None, bool unrollFull = false, const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr) - : unrollFactor(unrollFactor), unrollFull(unrollFull), - getUnrollFactor(getUnrollFactor) {} + : getUnrollFactor(getUnrollFactor) { + if (unrollFactor) + this->unrollFactor = *unrollFactor; + this->unrollFull = unrollFull; + } void runOnFunction() override; /// Unroll this for op. Returns failure if nothing was done. LogicalResult runOnAffineForOp(AffineForOp forOp); - - static const unsigned kDefaultUnrollFactor = 4; }; } // end anonymous namespace @@ -102,8 +105,7 @@ } void LoopUnroll::runOnFunction() { - if (clUnrollFull.getNumOccurrences() > 0 && - clUnrollFullThreshold.getNumOccurrences() > 0) { + if (unrollFull && unrollFullThreshold.hasValue()) { // Store short loops as we walk. SmallVector<AffineForOp, 4> loops; @@ -112,7 +114,7 @@ // an outer one may delete gathered inner ones). getFunction().walk([&](AffineForOp forOp) { Optional<uint64_t> tripCount = getConstantTripCount(forOp); - if (tripCount.hasValue() && tripCount.getValue() <= clUnrollFullThreshold) + if (tripCount.hasValue() && tripCount.getValue() <= unrollFullThreshold) loops.push_back(forOp); }); for (auto forOp : loops) @@ -120,9 +122,6 @@ return; } - unsigned numRepetitions = clUnrollNumRepetitions.getNumOccurrences() > 0 - ? clUnrollNumRepetitions - : 1; // If the call back is provided, we will recurse until no loops are found. FuncOp func = getFunction(); SmallVector<AffineForOp, 4> loops; @@ -144,28 +143,19 @@ /// failure otherwise. The default unroll factor is 4. LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) { // Use the function callback if one was provided. - if (getUnrollFactor) { + if (getUnrollFactor) return loopUnrollByFactor(forOp, getUnrollFactor(forOp)); - } - // Unroll by the factor passed, if any. - if (unrollFactor.hasValue()) - return loopUnrollByFactor(forOp, unrollFactor.getValue()); - // Unroll by the command line factor if one was specified. - if (clUnrollFactor.getNumOccurrences() > 0) - return loopUnrollByFactor(forOp, clUnrollFactor); // Unroll completely if full loop unroll was specified. - if (clUnrollFull.getNumOccurrences() > 0 || - (unrollFull.hasValue() && unrollFull.getValue())) + if (unrollFull) return loopUnrollFull(forOp); - - // Unroll by four otherwise. - return loopUnrollByFactor(forOp, kDefaultUnrollFactor); + // Otherwise, unroll by the given unroll factor. + return loopUnrollByFactor(forOp, unrollFactor); } std::unique_ptr<OperationPass<FuncOp>> mlir::createLoopUnrollPass( - int unrollFactor, int unrollFull, + int unrollFactor, bool unrollFull, const std::function<unsigned(AffineForOp)> &getUnrollFactor) { return std::make_unique<LoopUnroll>( - unrollFactor == -1 ? None : Optional<unsigned>(unrollFactor), - unrollFull == -1 ? None : Optional<bool>(unrollFull), getUnrollFactor); + unrollFactor == -1 ? None : Optional<unsigned>(unrollFactor), unrollFull, + getUnrollFactor); } diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp --- a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp @@ -49,27 +49,16 @@ #define DEBUG_TYPE "affine-loop-unroll-jam" -static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options"); - -// Loop unroll and jam factor. -static llvm::cl::opt<unsigned> - clUnrollJamFactor("unroll-jam-factor", llvm::cl::Hidden, - llvm::cl::desc("Use this unroll jam factor for all loops" - " (default 4)"), - llvm::cl::cat(clOptionsCategory)); - namespace { /// Loop unroll jam pass. Currently, this just unroll jams the first /// outer loop in a Function. struct LoopUnrollAndJam : public AffineLoopUnrollAndJamBase<LoopUnrollAndJam> { - Optional<unsigned> unrollJamFactor; - static const unsigned kDefaultUnrollJamFactor = 4; - - explicit LoopUnrollAndJam(Optional<unsigned> unrollJamFactor = None) - : unrollJamFactor(unrollJamFactor) {} + explicit LoopUnrollAndJam(Optional<unsigned> unrollJamFactor = None) { + if (unrollJamFactor) + this->unrollJamFactor = *unrollJamFactor; + } void runOnFunction() override; - LogicalResult runOnAffineForOp(AffineForOp forOp); }; } // end anonymous namespace @@ -85,19 +74,5 @@ // any for operation. auto &entryBlock = getFunction().front(); if (auto forOp = dyn_cast<AffineForOp>(entryBlock.front())) - runOnAffineForOp(forOp); -} - -/// Unroll and jam a 'affine.for' op. Default unroll jam factor is -/// kDefaultUnrollJamFactor. Return failure if nothing was done. -LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) { - // Unroll and jam by the factor that was passed if any. - if (unrollJamFactor.hasValue()) - return loopUnrollJamByFactor(forOp, unrollJamFactor.getValue()); - // Otherwise, unroll jam by the command-line factor if one was specified. - if (clUnrollJamFactor.getNumOccurrences() > 0) - return loopUnrollJamByFactor(forOp, clUnrollJamFactor); - - // Unroll and jam by four otherwise. - return loopUnrollJamByFactor(forOp, kDefaultUnrollJamFactor); + loopUnrollJamByFactor(forOp, unrollJamFactor); } diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp --- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp @@ -582,7 +582,7 @@ } // end anonymous namespace Vectorize::Vectorize(ArrayRef<int64_t> virtualVectorSize) { - vectorSizes->assign(virtualVectorSize.begin(), virtualVectorSize.end()); + vectorSizes = virtualVectorSize; } /////// TODO(ntv): Hoist to a VectorizationStrategy.cpp when appropriate. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -508,9 +508,7 @@ namespace { struct LinalgTilingPass : public LinalgTilingBase<LinalgTilingPass> { LinalgTilingPass() = default; - LinalgTilingPass(ArrayRef<int64_t> sizes) { - tileSizes->assign(sizes.begin(), sizes.end()); - } + LinalgTilingPass(ArrayRef<int64_t> sizes) { tileSizes = sizes; } void runOnFunction() override { tileLinalgOps<loop::ForOp>(getFunction(), tileSizes); @@ -521,7 +519,7 @@ : public LinalgTilingToParallelLoopsBase<LinalgTilingToParallelLoopsPass> { LinalgTilingToParallelLoopsPass() = default; LinalgTilingToParallelLoopsPass(ArrayRef<int64_t> sizes) { - tileSizes->assign(sizes.begin(), sizes.end()); + tileSizes = sizes; } void runOnFunction() override { diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -146,6 +146,11 @@ *elementsAttrElementLimit < int64_t(attr.getNumElements()); } +/// Return the size limit for printing large ElementsAttr. +Optional<int64_t> OpPrintingFlags::getLargeElementsAttrLimit() const { + return elementsAttrElementLimit; +} + /// Return if debug information should be printed. bool OpPrintingFlags::shouldPrintDebugInfo() const { return printDebugInfoFlag; diff --git a/mlir/lib/Transforms/Inliner.cpp b/mlir/lib/Transforms/Inliner.cpp --- a/mlir/lib/Transforms/Inliner.cpp +++ b/mlir/lib/Transforms/Inliner.cpp @@ -27,16 +27,6 @@ using namespace mlir; -static llvm::cl::opt<bool> disableCanonicalization( - "mlir-disable-inline-simplify", - llvm::cl::desc("Disable running simplifications during inlining"), - llvm::cl::ReallyHidden, llvm::cl::init(false)); - -static llvm::cl::opt<unsigned> maxInliningIterations( - "mlir-max-inline-iterations", - llvm::cl::desc("Maximum number of iterations when inlining within an SCC"), - llvm::cl::ReallyHidden, llvm::cl::init(4)); - //===----------------------------------------------------------------------===// // Symbol Use Tracking //===----------------------------------------------------------------------===// @@ -563,13 +553,55 @@ useList.recomputeUses(node, cg); } -/// Attempt to inline calls within the given scc, and run canonicalizations with -/// the given patterns, until a fixed point is reached. This allows for the -/// inlining of newly devirtualized calls. -static void inlineSCC(Inliner &inliner, CGUseList &useList, - MutableArrayRef<CallGraphNode *> currentSCC, - MLIRContext *context, - const OwningRewritePatternList &canonPatterns) { +//===----------------------------------------------------------------------===// +// InlinerPass +//===----------------------------------------------------------------------===// + +namespace { +struct InlinerPass : public InlinerBase<InlinerPass> { + void runOnOperation() override; + + /// Attempt to inline calls within the given scc, and run canonicalizations + /// with the given patterns, until a fixed point is reached. This allows for + /// the inlining of newly devirtualized calls. + void inlineSCC(Inliner &inliner, CGUseList &useList, + MutableArrayRef<CallGraphNode *> currentSCC, + MLIRContext *context, + const OwningRewritePatternList &canonPatterns); +}; +} // end anonymous namespace + +void InlinerPass::runOnOperation() { + CallGraph &cg = getAnalysis<CallGraph>(); + auto *context = &getContext(); + + // The inliner should only be run on operations that define a symbol table, + // as the callgraph will need to resolve references. + Operation *op = getOperation(); + if (!op->hasTrait<OpTrait::SymbolTable>()) { + op->emitOpError() << " was scheduled to run under the inliner, but does " + "not define a symbol table"; + return signalPassFailure(); + } + + // Collect a set of canonicalization patterns to use when simplifying + // callable regions within an SCC. + OwningRewritePatternList canonPatterns; + for (auto *op : context->getRegisteredOperations()) + op->getCanonicalizationPatterns(canonPatterns, context); + + // Run the inline transform in post-order over the SCCs in the callgraph. + Inliner inliner(context, cg); + CGUseList useList(getOperation(), cg); + runTransformOnCGSCCs(cg, [&](MutableArrayRef<CallGraphNode *> scc) { + inlineSCC(inliner, useList, scc, context, canonPatterns); + }); +} + +void InlinerPass::inlineSCC(Inliner &inliner, CGUseList &useList, + MutableArrayRef<CallGraphNode *> currentSCC, + MLIRContext *context, + const OwningRewritePatternList &canonPatterns) { // If we successfully inlined any calls, run some simplifications on the // nodes of the scc. Continue attempting to inline until we reach a fixed // point, or a maximum iteration count. We canonicalize here as it may @@ -584,41 +616,6 @@ } } -//===----------------------------------------------------------------------===// -// InlinerPass -//===----------------------------------------------------------------------===// - -namespace { -struct InlinerPass : public InlinerBase<InlinerPass> { - void runOnOperation() override { - CallGraph &cg = getAnalysis<CallGraph>(); - auto *context = &getContext(); - - // The inliner should only be run on operations that define a symbol table, - // as the callgraph will need to resolve references. - Operation *op = getOperation(); - if (!op->hasTrait<OpTrait::SymbolTable>()) { - op->emitOpError() << " was scheduled to run under the inliner, but does " - "not define a symbol table"; - return signalPassFailure(); - } - - // Collect a set of canonicalization patterns to use when simplifying - // callable regions within an SCC. - OwningRewritePatternList canonPatterns; - for (auto *op : context->getRegisteredOperations()) - op->getCanonicalizationPatterns(canonPatterns, context); - - // Run the inline transform in post-order over the SCCs in the callgraph. - Inliner inliner(context, cg); - CGUseList useList(getOperation(), cg); - runTransformOnCGSCCs(cg, [&](MutableArrayRef<CallGraphNode *> scc) { - inlineSCC(inliner, useList, scc, context, canonPatterns); - }); - } -}; -} // end anonymous namespace - std::unique_ptr<Pass> mlir::createInlinerPass() { return std::make_unique<InlinerPass>(); } diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp --- a/mlir/lib/Transforms/LoopFusion.cpp +++ b/mlir/lib/Transforms/LoopFusion.cpp @@ -37,36 +37,6 @@ using namespace mlir; -static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options"); - -/// Disables fusion profitability check and fuses if valid. Ignore any -/// additional (redundant) computation tolerance threshold -/// that would have prevented fusion. -static llvm::cl::opt<bool> - clMaximalLoopFusion("fusion-maximal", - llvm::cl::desc("Enables maximal loop fusion"), - llvm::cl::cat(clOptionsCategory)); - -/// A threshold in percent of additional computation allowed when fusing. -static llvm::cl::opt<double> clFusionAddlComputeTolerance( - "fusion-compute-tolerance", - llvm::cl::desc("Fractional increase in additional " - "computation tolerated while fusing"), - llvm::cl::cat(clOptionsCategory)); - -static llvm::cl::opt<unsigned> clFusionFastMemorySpace( - "fusion-fast-mem-space", - llvm::cl::desc("Faster memory space number to promote fusion buffers to"), - llvm::cl::cat(clOptionsCategory)); - -// A local buffer of size less than or equal to this size is automatically -// promoted to fast memory after producer-consumer fusion. -static llvm::cl::opt<unsigned long long> clFusionLocalBufThreshold( - "fusion-local-buf-threshold", - llvm::cl::desc("Threshold size (KiB) for promoting local buffers to fast " - "memory space"), - llvm::cl::cat(clOptionsCategory)); - namespace { /// Loop fusion pass. This pass currently supports a greedy fusion policy, /// which fuses loop nests with single-writer/single-reader memref dependences @@ -78,24 +48,15 @@ // and add support for more general loop fusion algorithms. struct LoopFusion : public AffineLoopFusionBase<LoopFusion> { - LoopFusion(unsigned fastMemorySpace = 0, uint64_t localBufSizeThreshold = 0, - bool maximalFusion = false) - : localBufSizeThreshold(localBufSizeThreshold), - fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion) {} + LoopFusion() = default; + LoopFusion(unsigned fastMemorySpace, uint64_t localBufSizeThresholdBytes, + bool maximalFusion) { + this->fastMemorySpace = fastMemorySpace; + this->localBufSizeThreshold = localBufSizeThresholdBytes / 1024; + this->maximalFusion = maximalFusion; + } void runOnFunction() override; - - // Any local buffers smaller than this size (in bytes) will be created in - // `fastMemorySpace` if provided. - uint64_t localBufSizeThreshold; - Optional<unsigned> fastMemorySpace = None; - // If true, ignore any additional (redundant) computation tolerance threshold - // that would have prevented fusion. - bool maximalFusion; - - // The amount of additional computation that is tolerated while fusing - // pair-wise as a fraction of the total computation. - constexpr static double kComputeToleranceThreshold = 0.30f; }; } // end anonymous namespace @@ -1098,7 +1059,8 @@ ArrayRef<Operation *> dstLoadOpInsts, ArrayRef<Operation *> dstStoreOpInsts, ComputationSliceState *sliceState, - unsigned *dstLoopDepth, bool maximalFusion) { + unsigned *dstLoopDepth, bool maximalFusion, + double computeToleranceThreshold) { LLVM_DEBUG({ llvm::dbgs() << "Checking whether fusion is profitable between:\n"; llvm::dbgs() << " " << *srcOpInst << " and \n"; @@ -1247,11 +1209,6 @@ llvm::dbgs() << msg.str(); }); - double computeToleranceThreshold = - clFusionAddlComputeTolerance.getNumOccurrences() > 0 - ? clFusionAddlComputeTolerance - : LoopFusion::kComputeToleranceThreshold; - // TODO(b/123247369): This is a placeholder cost model. // Among all choices that add an acceptable amount of redundant computation // (as per computeToleranceThreshold), we will simply pick the one that @@ -1426,13 +1383,18 @@ // If true, ignore any additional (redundant) computation tolerance threshold // that would have prevented fusion. bool maximalFusion; + // The amount of additional computation that is tolerated while fusing + // pair-wise as a fraction of the total computation. + double computeToleranceThreshold; using Node = MemRefDependenceGraph::Node; GreedyFusion(MemRefDependenceGraph *mdg, unsigned localBufSizeThreshold, - Optional<unsigned> fastMemorySpace, bool maximalFusion) + Optional<unsigned> fastMemorySpace, bool maximalFusion, + double computeToleranceThreshold) : mdg(mdg), localBufSizeThreshold(localBufSizeThreshold), - fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion) {} + fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion), + computeToleranceThreshold(computeToleranceThreshold) {} // Initializes 'worklist' with nodes from 'mdg' void init() { @@ -1608,7 +1570,8 @@ // Check if fusion would be profitable. if (!isFusionProfitable(srcStoreOp, srcStoreOp, dstLoadOpInsts, dstStoreOpInsts, &sliceState, - &bestDstLoopDepth, maximalFusion)) + &bestDstLoopDepth, maximalFusion, + computeToleranceThreshold)) continue; // Fuse computation slice of 'srcLoopNest' into 'dstLoopNest'. @@ -1769,7 +1732,7 @@ // Check if fusion would be profitable. if (!isFusionProfitable(sibLoadOpInst, sibStoreOpInst, dstLoadOpInsts, dstStoreOpInsts, &sliceState, &bestDstLoopDepth, - maximalFusion)) + maximalFusion, computeToleranceThreshold)) continue; // Fuse computation slice of 'sibLoopNest' into 'dstLoopNest'. @@ -1954,21 +1917,15 @@ } // end anonymous namespace void LoopFusion::runOnFunction() { - // Override if a command line argument was provided. - if (clFusionFastMemorySpace.getNumOccurrences() > 0) { - fastMemorySpace = clFusionFastMemorySpace.getValue(); - } - - // Override if a command line argument was provided. - if (clFusionLocalBufThreshold.getNumOccurrences() > 0) { - localBufSizeThreshold = clFusionLocalBufThreshold * 1024; - } - - if (clMaximalLoopFusion.getNumOccurrences() > 0) - maximalFusion = clMaximalLoopFusion; - MemRefDependenceGraph g; - if (g.init(getFunction())) - GreedyFusion(&g, localBufSizeThreshold, fastMemorySpace, maximalFusion) - .run(); + if (!g.init(getFunction())) + return; + + Optional<unsigned> fastMemorySpaceOpt; + if (fastMemorySpace.hasValue()) + fastMemorySpaceOpt = fastMemorySpace; + unsigned localBufSizeThresholdBytes = localBufSizeThreshold * 1024; + GreedyFusion fusion(&g, localBufSizeThresholdBytes, fastMemorySpaceOpt, + maximalFusion, computeToleranceThreshold); + fusion.run(); } diff --git a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp --- a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp +++ b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp @@ -23,13 +23,10 @@ #define DEBUG_TYPE "pattern-matcher" -static llvm::cl::opt<unsigned> maxPatternMatchIterations( - "mlir-max-pattern-match-iterations", - llvm::cl::desc("Max number of iterations scanning for pattern match"), - llvm::cl::init(10)); +/// The max number of iterations scanning for pattern match. +static unsigned maxPatternMatchIterations = 10; namespace { - /// This is a worklist-driven driver for the PatternMatcher, which repeatedly /// applies the locally optimal patterns in a roughly "bottom up" way. class GreedyPatternRewriteDriver : public PatternRewriter { diff --git a/mlir/lib/Transforms/ViewOpGraph.cpp b/mlir/lib/Transforms/ViewOpGraph.cpp --- a/mlir/lib/Transforms/ViewOpGraph.cpp +++ b/mlir/lib/Transforms/ViewOpGraph.cpp @@ -14,13 +14,16 @@ #include "mlir/Support/STLExtras.h" #include "llvm/Support/CommandLine.h" -static llvm::cl::opt<int> elideIfLarger( - "print-op-graph-elide-if-larger", - llvm::cl::desc("Upper limit to emit elements attribute rather than elide"), - llvm::cl::init(16)); - using namespace mlir; +/// Return the size limits for eliding large attributes. +static int64_t getLargeAttributeSizeLimit() { + // Use the default from the printer flags if possible. + if (Optional<int64_t> limit = OpPrintingFlags().getLargeElementsAttrLimit()) + return *limit; + return 16; +} + namespace llvm { // Specialize GraphTraits to treat Block as a graph of Operations as nodes and @@ -65,6 +68,8 @@ interleaveComma(op->getResultTypes(), os); os << "\n"; + // A value used to elide large container attribute. + int64_t largeAttrLimit = getLargeAttributeSizeLimit(); for (auto attr : op->getAttrs()) { os << '\n' << attr.first << ": "; // Always emit splat attributes. @@ -75,7 +80,7 @@ // Elide "big" elements attributes. auto elements = attr.second.dyn_cast<ElementsAttr>(); - if (elements && elements.getNumElements() > elideIfLarger) { + if (elements && elements.getNumElements() > largeAttrLimit) { os << std::string(elements.getType().getRank(), '[') << "..." << std::string(elements.getType().getRank(), ']') << " : " << elements.getType(); @@ -83,7 +88,7 @@ } auto array = attr.second.dyn_cast<ArrayAttr>(); - if (array && static_cast<int64_t>(array.size()) > elideIfLarger) { + if (array && static_cast<int64_t>(array.size()) > largeAttrLimit) { os << "[...]"; continue; } diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir --- a/mlir/test/Dialect/Affine/affine-data-copy.mlir +++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma=false -affine-data-copy-generate-fast-mem-space=0 -affine-data-copy-generate-skip-non-unit-stride-loops | FileCheck %s +// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate="generate-dma=false fast-mem-space=0 skip-non-unit-stride-loops" | FileCheck %s // Small buffer size to trigger fine copies. -// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma=false -affine-data-copy-generate-fast-mem-space=0 -affine-data-copy-generate-fast-mem-capacity=1 | FileCheck --check-prefix=CHECK-SMALL %s +// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate="generate-dma=false fast-mem-space=0 fast-mem-capacity=1" | FileCheck --check-prefix=CHECK-SMALL %s // Test affine data copy with a memref filter. We use a test pass that invokes // affine data copy utility on the input loop nest. diff --git a/mlir/test/Dialect/Affine/dma-generate.mlir b/mlir/test/Dialect/Affine/dma-generate.mlir --- a/mlir/test/Dialect/Affine/dma-generate.mlir +++ b/mlir/test/Dialect/Affine/dma-generate.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma -affine-data-copy-generate-fast-mem-space=2 -affine-data-copy-generate-skip-non-unit-stride-loops -verify-diagnostics | FileCheck %s -// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma -affine-data-copy-generate-fast-mem-capacity=16 -affine-data-copy-generate-fast-mem-space=2 | FileCheck %s --check-prefix FAST-MEM-16KB +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate="generate-dma fast-mem-space=2 skip-non-unit-stride-loops" -verify-diagnostics | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate="generate-dma fast-mem-capacity=16 fast-mem-space=2" | FileCheck %s --check-prefix FAST-MEM-16KB // We run most test cases with -copy-skip-non-unit-stride-loops to allow testing // DMA generation at inner levels easily - since the DMA generation would diff --git a/mlir/test/Dialect/Affine/inlining.mlir b/mlir/test/Dialect/Affine/inlining.mlir --- a/mlir/test/Dialect/Affine/inlining.mlir +++ b/mlir/test/Dialect/Affine/inlining.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -inline -mlir-disable-inline-simplify | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -inline="disable-simplify" | FileCheck %s // Basic test that functions within affine operations are inlined. func @func_with_affine_ops(%N: index) { diff --git a/mlir/test/Dialect/Affine/loop-tiling.mlir b/mlir/test/Dialect/Affine/loop-tiling.mlir --- a/mlir/test/Dialect/Affine/loop-tiling.mlir +++ b/mlir/test/Dialect/Affine/loop-tiling.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile -affine-tile-size=32 | FileCheck %s -// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile -affine-tile-cache-size=512 | FileCheck %s --check-prefix=MODEL -// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile -affine-tile-size=32 -affine-tile-separate | FileCheck %s --check-prefix=SEPARATE +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile="tile-size=32" | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile="cache-size=512" | FileCheck %s --check-prefix=MODEL +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile="tile-size=32 separate" | FileCheck %s --check-prefix=SEPARATE // ----- diff --git a/mlir/test/Dialect/Affine/unroll-jam.mlir b/mlir/test/Dialect/Affine/unroll-jam.mlir --- a/mlir/test/Dialect/Affine/unroll-jam.mlir +++ b/mlir/test/Dialect/Affine/unroll-jam.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam -unroll-jam-factor=2 | FileCheck %s -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam -unroll-jam-factor=4 | FileCheck --check-prefix=UJAM-FOUR %s +// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=2" | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=4" | FileCheck --check-prefix=UJAM-FOUR %s // CHECK-DAG: [[MAP_PLUS_1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)> // CHECK-DAG: [[MAP_DIV_OFFSET:#map[0-9]+]] = affine_map<()[s0] -> (((s0 - 1) floordiv 2) * 2 + 1)> diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir --- a/mlir/test/Dialect/Affine/unroll.mlir +++ b/mlir/test/Dialect/Affine/unroll.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-full | FileCheck %s --check-prefix UNROLL-FULL -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-full -unroll-full-threshold=2 | FileCheck %s --check-prefix SHORT -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-factor=4 | FileCheck %s --check-prefix UNROLL-BY-4 -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-factor=1 | FileCheck %s --check-prefix UNROLL-BY-1 +// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full" | FileCheck %s --check-prefix UNROLL-FULL +// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full unroll-full-threshold=2" | FileCheck %s --check-prefix SHORT +// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=4" | FileCheck %s --check-prefix UNROLL-BY-4 +// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=1" | FileCheck %s --check-prefix UNROLL-BY-1 // UNROLL-FULL-DAG: [[MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)> // UNROLL-FULL-DAG: [[MAP1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 2)> diff --git a/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir b/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir --- a/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline='spv.module(inline)' -mlir-disable-inline-simplify | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline='spv.module(inline{disable-simplify})' | FileCheck %s spv.module Logical GLSL450 { spv.func @callee() "None" { diff --git a/mlir/test/Transforms/inlining.mlir b/mlir/test/Transforms/inlining.mlir --- a/mlir/test/Transforms/inlining.mlir +++ b/mlir/test/Transforms/inlining.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -inline -mlir-disable-inline-simplify | FileCheck %s -// RUN: mlir-opt %s -inline -mlir-disable-inline-simplify -mlir-print-debuginfo | FileCheck %s --check-prefix INLINE-LOC -// RUN: mlir-opt %s -inline -mlir-disable-inline-simplify=false | FileCheck %s --check-prefix INLINE_SIMPLIFY +// RUN: mlir-opt %s -inline="disable-simplify" | FileCheck %s +// RUN: mlir-opt %s -inline="disable-simplify" -mlir-print-debuginfo | FileCheck %s --check-prefix INLINE-LOC +// RUN: mlir-opt %s -inline | FileCheck %s --check-prefix INLINE_SIMPLIFY // Inline a function that takes an argument. func @func_with_arg(%c : i32) -> i32 { diff --git a/mlir/test/Transforms/loop-fusion.mlir b/mlir/test/Transforms/loop-fusion.mlir --- a/mlir/test/Transforms/loop-fusion.mlir +++ b/mlir/test/Transforms/loop-fusion.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion -split-input-file | FileCheck %s -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion -fusion-maximal -split-input-file | FileCheck %s --check-prefix=MAXIMAL +// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion="fusion-maximal" -split-input-file | FileCheck %s --check-prefix=MAXIMAL // TODO(andydavis) Add more tests: // *) Add nested fusion test cases when non-constant loop bound support is diff --git a/mlir/test/lib/Pass/TestPassManager.cpp b/mlir/test/lib/Pass/TestPassManager.cpp --- a/mlir/test/lib/Pass/TestPassManager.cpp +++ b/mlir/test/lib/Pass/TestPassManager.cpp @@ -35,10 +35,9 @@ TestOptionsPass() = default; TestOptionsPass(const TestOptionsPass &) {} TestOptionsPass(const Options &options) { - listOption->assign(options.listOption.begin(), options.listOption.end()); - stringOption.setValue(options.stringOption); - stringListOption->assign(options.stringListOption.begin(), - options.stringListOption.end()); + listOption = options.listOption; + stringOption = options.stringOption; + stringListOption = options.stringListOption; } void runOnFunction() final {}