diff --git a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp --- a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Func/Transforms/FuncConversions.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/DialectConversion.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" @@ -566,6 +567,37 @@ } }; +//===----------------------------------------------------------------------===// +// ConvertMaxMin +//===----------------------------------------------------------------------===// + +template +struct ConvertMaxMin final : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Location loc = op->getLoc(); + + Type oldTy = op.getType(); + auto newTy = this->getTypeConverter() + ->convertType(oldTy) + .template dyn_cast_or_null(); + if (!newTy) + return rewriter.notifyMatchFailure( + loc, llvm::formatv("unsupported type: {0}", op.getType())); + + // Rewrite Max*I/Min*I as compare and select over original operands. Let + // the CmpI and Select emulation patterns handle the final legalization. + Value cmp = + rewriter.create(loc, CmpPred, op.getLhs(), op.getRhs()); + rewriter.replaceOpWithNewOp(op, cmp, op.getLhs(), + op.getRhs()); + return success(); + } +}; + //===----------------------------------------------------------------------===// // ConvertSelect //===----------------------------------------------------------------------===// @@ -1005,6 +1037,10 @@ ConvertConstant, ConvertCmpI, ConvertSelect, ConvertVectorPrint, // Binary ops. ConvertAddI, ConvertMulI, ConvertShLI, ConvertShRSI, ConvertShRUI, + ConvertMaxMin, + ConvertMaxMin, + ConvertMaxMin, + ConvertMaxMin, // Bitwise binary ops. ConvertBitwiseBinary, ConvertBitwiseBinary, ConvertBitwiseBinary, diff --git a/mlir/test/Dialect/Arith/emulate-wide-int.mlir b/mlir/test/Dialect/Arith/emulate-wide-int.mlir --- a/mlir/test/Dialect/Arith/emulate-wide-int.mlir +++ b/mlir/test/Dialect/Arith/emulate-wide-int.mlir @@ -395,6 +395,134 @@ return %b : vector<3xi16> } +// CHECK-LABEL: func @maxui_scalar +// CHECK-SAME: ([[ARG0:%.+]]: vector<2xi32>, [[ARG1:%.+]]: vector<2xi32>) -> vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG0]][0] : vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG0]][1] : vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG1]][0] : vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG1]][1] : vector<2xi32> +// CHECK: arith.cmpi ugt +// CHECK: arith.cmpi ugt +// CHECK: arith.cmpi eq +// CHECK: arith.select +// CHECK: arith.select +// CHECK: [[INS0:%.+]] = vector.insert {{%.+}}, {{%.+}} [0] : i32 into vector<2xi32> +// CHECK-NEXT: [[INS1:%.+]] = vector.insert {{%.+}}, [[INS0]] [1] : i32 into vector<2xi32> +// CHECK-NEXT: return [[INS1]] : vector<2xi32> +func.func @maxui_scalar(%a : i64, %b : i64) -> i64 { + %x = arith.maxui %a, %b : i64 + return %x : i64 +} + +// CHECK-LABEL: func @maxui_vector +// CHECK-SAME: ([[ARG0:%.+]]: vector<3x2xi32>, [[ARG1:%.+]]: vector<3x2xi32>) -> vector<3x2xi32> +// CHECK: arith.cmpi ugt +// CHECK: arith.cmpi ugt +// CHECK: arith.cmpi eq +// CHECK: arith.select +// CHECK: arith.select +// CHECK: return {{.+}} : vector<3x2xi32> +func.func @maxui_vector(%a : vector<3xi64>, %b : vector<3xi64>) -> vector<3xi64> { + %x = arith.maxui %a, %b : vector<3xi64> + return %x : vector<3xi64> +} + +// CHECK-LABEL: func @maxsi_scalar +// CHECK-SAME: ([[ARG0:%.+]]: vector<2xi32>, [[ARG1:%.+]]: vector<2xi32>) -> vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG0]][0] : vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG0]][1] : vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG1]][0] : vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG1]][1] : vector<2xi32> +// CHECK: arith.cmpi ugt +// CHECK: arith.cmpi sgt +// CHECK: arith.cmpi eq +// CHECK: arith.select +// CHECK: arith.select +// CHECK: [[INS0:%.+]] = vector.insert {{%.+}}, {{%.+}} [0] : i32 into vector<2xi32> +// CHECK-NEXT: [[INS1:%.+]] = vector.insert {{%.+}}, [[INS0]] [1] : i32 into vector<2xi32> +// CHECK-NEXT: return [[INS1]] : vector<2xi32> +func.func @maxsi_scalar(%a : i64, %b : i64) -> i64 { + %x = arith.maxsi %a, %b : i64 + return %x : i64 +} + +// CHECK-LABEL: func @maxsi_vector +// CHECK-SAME: ([[ARG0:%.+]]: vector<3x2xi32>, [[ARG1:%.+]]: vector<3x2xi32>) -> vector<3x2xi32> +// CHECK: arith.cmpi ugt +// CHECK: arith.cmpi sgt +// CHECK: arith.cmpi eq +// CHECK: arith.select +// CHECK: arith.select +// CHECK: return {{.+}} : vector<3x2xi32> +func.func @maxsi_vector(%a : vector<3xi64>, %b : vector<3xi64>) -> vector<3xi64> { + %x = arith.maxsi %a, %b : vector<3xi64> + return %x : vector<3xi64> +} + +// CHECK-LABEL: func @minui_scalar +// CHECK-SAME: ([[ARG0:%.+]]: vector<2xi32>, [[ARG1:%.+]]: vector<2xi32>) -> vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG0]][0] : vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG0]][1] : vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG1]][0] : vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG1]][1] : vector<2xi32> +// CHECK: arith.cmpi ult +// CHECK: arith.cmpi ult +// CHECK: arith.cmpi eq +// CHECK: arith.select +// CHECK: arith.select +// CHECK: [[INS0:%.+]] = vector.insert {{%.+}}, {{%.+}} [0] : i32 into vector<2xi32> +// CHECK-NEXT: [[INS1:%.+]] = vector.insert {{%.+}}, [[INS0]] [1] : i32 into vector<2xi32> +// CHECK-NEXT: return [[INS1]] : vector<2xi32> +func.func @minui_scalar(%a : i64, %b : i64) -> i64 { + %x = arith.minui %a, %b : i64 + return %x : i64 +} + +// CHECK-LABEL: func @minui_vector +// CHECK-SAME: ([[ARG0:%.+]]: vector<3x2xi32>, [[ARG1:%.+]]: vector<3x2xi32>) -> vector<3x2xi32> +// CHECK: arith.cmpi ult +// CHECK: arith.cmpi ult +// CHECK: arith.cmpi eq +// CHECK: arith.select +// CHECK: arith.select +// CHECK: return {{.+}} : vector<3x2xi32> +func.func @minui_vector(%a : vector<3xi64>, %b : vector<3xi64>) -> vector<3xi64> { + %x = arith.minui %a, %b : vector<3xi64> + return %x : vector<3xi64> +} + +// CHECK-LABEL: func @minsi_scalar +// CHECK-SAME: ([[ARG0:%.+]]: vector<2xi32>, [[ARG1:%.+]]: vector<2xi32>) -> vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG0]][0] : vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG0]][1] : vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG1]][0] : vector<2xi32> +// CHECK-NEXT: vector.extract [[ARG1]][1] : vector<2xi32> +// CHECK: arith.cmpi ult +// CHECK: arith.cmpi slt +// CHECK: arith.cmpi eq +// CHECK: arith.select +// CHECK: arith.select +// CHECK: [[INS0:%.+]] = vector.insert {{%.+}}, {{%.+}} [0] : i32 into vector<2xi32> +// CHECK-NEXT: [[INS1:%.+]] = vector.insert {{%.+}}, [[INS0]] [1] : i32 into vector<2xi32> +// CHECK-NEXT: return [[INS1]] : vector<2xi32> +func.func @minsi_scalar(%a : i64, %b : i64) -> i64 { + %x = arith.minsi %a, %b : i64 + return %x : i64 +} + +// CHECK-LABEL: func @minsi_vector +// CHECK-SAME: ([[ARG0:%.+]]: vector<3x2xi32>, [[ARG1:%.+]]: vector<3x2xi32>) -> vector<3x2xi32> +// CHECK: arith.cmpi ult +// CHECK: arith.cmpi slt +// CHECK: arith.cmpi eq +// CHECK: arith.select +// CHECK: arith.select +// CHECK: return {{.+}} : vector<3x2xi32> +func.func @minsi_vector(%a : vector<3xi64>, %b : vector<3xi64>) -> vector<3xi64> { + %x = arith.minsi %a, %b : vector<3xi64> + return %x : vector<3xi64> +} + // CHECK-LABEL: func.func @select_scalar // CHECK-SAME: ([[ARG0:%.+]]: vector<2xi32>, [[ARG1:%.+]]: vector<2xi32>, [[ARG2:%.+]]: i1) // CHECK-SAME: -> vector<2xi32> diff --git a/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-max-min-i16.mlir b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-max-min-i16.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-max-min-i16.mlir @@ -0,0 +1,157 @@ +// Check that the wide integer `arith.max*i`/`min*i` emulation produces the +// same result as wide ops. Emulate i16 ops with i8 ops. +// Ops in functions prefixed with `emulate` will be emulated using i8 types. + +// RUN: mlir-opt %s --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: --shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s --match-full-lines + +// RUN: mlir-opt %s --test-arith-emulate-wide-int="widest-int-supported=8" \ +// RUN: --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: --shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s --match-full-lines + +func.func @emulate_maxui(%lhs : i16, %rhs : i16) -> (i16) { + %res = arith.maxui %lhs, %rhs : i16 + return %res : i16 +} + +func.func @check_maxui(%lhs : i16, %rhs : i16) -> () { + %res = func.call @emulate_maxui(%lhs, %rhs) : (i16, i16) -> (i16) + vector.print %res : i16 + return +} + +func.func @emulate_maxsi(%lhs : i16, %rhs : i16) -> (i16) { + %res = arith.maxsi %lhs, %rhs : i16 + return %res : i16 +} + +func.func @check_maxsi(%lhs : i16, %rhs : i16) -> () { + %res = func.call @emulate_maxsi(%lhs, %rhs) : (i16, i16) -> (i16) + vector.print %res : i16 + return +} + +func.func @emulate_minui(%lhs : i16, %rhs : i16) -> (i16) { + %res = arith.minui %lhs, %rhs : i16 + return %res : i16 +} + +func.func @check_minui(%lhs : i16, %rhs : i16) -> () { + %res = func.call @emulate_minui(%lhs, %rhs) : (i16, i16) -> (i16) + vector.print %res : i16 + return +} + +func.func @emulate_minsi(%lhs : i16, %rhs : i16) -> (i16) { + %res = arith.minsi %lhs, %rhs : i16 + return %res : i16 +} + +func.func @check_minsi(%lhs : i16, %rhs : i16) -> () { + %res = func.call @emulate_minsi(%lhs, %rhs) : (i16, i16) -> (i16) + vector.print %res : i16 + return +} + + +func.func @entry() { + %cst0 = arith.constant 0 : i16 + %cst1 = arith.constant 1 : i16 + %cst7 = arith.constant 7 : i16 + %cst_n1 = arith.constant -1 : i16 + %cst1337 = arith.constant 1337 : i16 + %cst4096 = arith.constant 4096 : i16 + %cst_i16_min = arith.constant -32768 : i16 + + // CHECK: 0 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1 + // CHECK-NEXT: -1 + // CHECK-NEXT: -1 + // CHECK-NEXT: -1 + // CHECK-NEXT: 1337 + // CHECK-NEXT: 4096 + // CHECK-NEXT: -32768 + func.call @check_maxui(%cst0, %cst0) : (i16, i16) -> () + func.call @check_maxui(%cst0, %cst1) : (i16, i16) -> () + func.call @check_maxui(%cst1, %cst0) : (i16, i16) -> () + func.call @check_maxui(%cst1, %cst1) : (i16, i16) -> () + func.call @check_maxui(%cst_n1, %cst1) : (i16, i16) -> () + func.call @check_maxui(%cst1, %cst_n1) : (i16, i16) -> () + func.call @check_maxui(%cst_n1, %cst1337) : (i16, i16) -> () + func.call @check_maxui(%cst1337, %cst1337) : (i16, i16) -> () + func.call @check_maxui(%cst4096, %cst4096) : (i16, i16) -> () + func.call @check_maxui(%cst1337, %cst_i16_min) : (i16, i16) -> () + + // CHECK-NEXT: 0 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1337 + // CHECK-NEXT: 1337 + // CHECK-NEXT: 4096 + // CHECK-NEXT: 1337 + func.call @check_maxsi(%cst0, %cst0) : (i16, i16) -> () + func.call @check_maxsi(%cst0, %cst1) : (i16, i16) -> () + func.call @check_maxsi(%cst1, %cst0) : (i16, i16) -> () + func.call @check_maxsi(%cst1, %cst1) : (i16, i16) -> () + func.call @check_maxsi(%cst_n1, %cst1) : (i16, i16) -> () + func.call @check_maxsi(%cst1, %cst_n1) : (i16, i16) -> () + func.call @check_maxsi(%cst_n1, %cst1337) : (i16, i16) -> () + func.call @check_maxsi(%cst1337, %cst1337) : (i16, i16) -> () + func.call @check_maxsi(%cst4096, %cst4096) : (i16, i16) -> () + func.call @check_maxsi(%cst1337, %cst_i16_min) : (i16, i16) -> () + + // CHECK-NEXT: 0 + // CHECK-NEXT: 0 + // CHECK-NEXT: 0 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1337 + // CHECK-NEXT: 1337 + // CHECK-NEXT: 4096 + // CHECK-NEXT: 1337 + func.call @check_minui(%cst0, %cst0) : (i16, i16) -> () + func.call @check_minui(%cst0, %cst1) : (i16, i16) -> () + func.call @check_minui(%cst1, %cst0) : (i16, i16) -> () + func.call @check_minui(%cst1, %cst1) : (i16, i16) -> () + func.call @check_minui(%cst_n1, %cst1) : (i16, i16) -> () + func.call @check_minui(%cst1, %cst_n1) : (i16, i16) -> () + func.call @check_minui(%cst_n1, %cst1337) : (i16, i16) -> () + func.call @check_minui(%cst1337, %cst1337) : (i16, i16) -> () + func.call @check_minui(%cst4096, %cst4096) : (i16, i16) -> () + func.call @check_minui(%cst1337, %cst_i16_min) : (i16, i16) -> () + + // CHECK-NEXT: 0 + // CHECK-NEXT: 0 + // CHECK-NEXT: 0 + // CHECK-NEXT: 1 + // CHECK-NEXT: -1 + // CHECK-NEXT: -1 + // CHECK-NEXT: -1 + // CHECK-NEXT: 1337 + // CHECK-NEXT: 4096 + // CHECK-NEXT: -32768 + func.call @check_minsi(%cst0, %cst0) : (i16, i16) -> () + func.call @check_minsi(%cst0, %cst1) : (i16, i16) -> () + func.call @check_minsi(%cst1, %cst0) : (i16, i16) -> () + func.call @check_minsi(%cst1, %cst1) : (i16, i16) -> () + func.call @check_minsi(%cst_n1, %cst1) : (i16, i16) -> () + func.call @check_minsi(%cst1, %cst_n1) : (i16, i16) -> () + func.call @check_minsi(%cst_n1, %cst1337) : (i16, i16) -> () + func.call @check_minsi(%cst1337, %cst1337) : (i16, i16) -> () + func.call @check_minsi(%cst4096, %cst4096) : (i16, i16) -> () + func.call @check_minsi(%cst1337, %cst_i16_min) : (i16, i16) -> () + + return +}