Index: mlir/lib/Analysis/AffineAnalysis.cpp =================================================================== --- mlir/lib/Analysis/AffineAnalysis.cpp +++ mlir/lib/Analysis/AffineAnalysis.cpp @@ -58,6 +58,12 @@ .Case([](arith::MulFOp) { return AtomicRMWKind::mulf; }) .Case([](arith::AddIOp) { return AtomicRMWKind::addi; }) .Case([](arith::MulIOp) { return AtomicRMWKind::muli; }) + .Case([](MinFOp) { return AtomicRMWKind::minf; }) + .Case([](MaxFOp) { return AtomicRMWKind::maxf; }) + .Case([](MinSIOp) { return AtomicRMWKind::mins; }) + .Case([](MaxSIOp) { return AtomicRMWKind::maxs; }) + .Case([](MinUIOp) { return AtomicRMWKind::minu; }) + .Case([](MaxUIOp) { return AtomicRMWKind::maxu; }) .Default([](Operation *) -> Optional { // TODO: AtomicRMW supports other kinds of reductions this is // currently not detecting, add those when the need arises. Index: mlir/lib/Dialect/StandardOps/IR/Ops.cpp =================================================================== --- mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -251,35 +251,17 @@ case AtomicRMWKind::muli: return builder.create(loc, lhs, rhs); case AtomicRMWKind::maxf: - return builder.create( - loc, - builder.create(loc, arith::CmpFPredicate::OGT, lhs, rhs), - lhs, rhs); + return builder.create(loc, lhs, rhs); case AtomicRMWKind::minf: - return builder.create( - loc, - builder.create(loc, arith::CmpFPredicate::OLT, lhs, rhs), - lhs, rhs); + return builder.create(loc, lhs, rhs); case AtomicRMWKind::maxs: - return builder.create( - loc, - builder.create(loc, arith::CmpIPredicate::sgt, lhs, rhs), - lhs, rhs); + return builder.create(loc, lhs, rhs); case AtomicRMWKind::mins: - return builder.create( - loc, - builder.create(loc, arith::CmpIPredicate::slt, lhs, rhs), - lhs, rhs); + return builder.create(loc, lhs, rhs); case AtomicRMWKind::maxu: - return builder.create( - loc, - builder.create(loc, arith::CmpIPredicate::ugt, lhs, rhs), - lhs, rhs); + return builder.create(loc, lhs, rhs); case AtomicRMWKind::minu: - return builder.create( - loc, - builder.create(loc, arith::CmpIPredicate::ult, lhs, rhs), - lhs, rhs); + return builder.create(loc, lhs, rhs); // TODO: Add remaining reduction operations. default: (void)emitOptionalError(loc, "Reduction operation type not supported"); Index: mlir/lib/Dialect/Vector/VectorOps.cpp =================================================================== --- mlir/lib/Dialect/Vector/VectorOps.cpp +++ mlir/lib/Dialect/Vector/VectorOps.cpp @@ -371,16 +371,28 @@ builder.getStringAttr("mul"), vector, ValueRange{}); case AtomicRMWKind::minf: + return builder.create(vector.getLoc(), scalarType, + builder.getStringAttr("minf"), + vector, ValueRange{}); case AtomicRMWKind::mins: + return builder.create(vector.getLoc(), scalarType, + builder.getStringAttr("minsi"), + vector, ValueRange{}); case AtomicRMWKind::minu: return builder.create(vector.getLoc(), scalarType, - builder.getStringAttr("min"), + builder.getStringAttr("minui"), vector, ValueRange{}); case AtomicRMWKind::maxf: + return builder.create(vector.getLoc(), scalarType, + builder.getStringAttr("maxf"), + vector, ValueRange{}); case AtomicRMWKind::maxs: + return builder.create(vector.getLoc(), scalarType, + builder.getStringAttr("maxsi"), + vector, ValueRange{}); case AtomicRMWKind::maxu: return builder.create(vector.getLoc(), scalarType, - builder.getStringAttr("max"), + builder.getStringAttr("maxui"), vector, ValueRange{}); // TODO: Add remaining reduction operations. default: Index: mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir =================================================================== --- mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir +++ mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir @@ -29,6 +29,37 @@ // ----- +func @vecdim_reduction_minf(%in: memref<256x512xf32>, %out: memref<256xf32>) { + %cst = arith.constant 0.000000e+00 : f32 + affine.for %i = 0 to 256 { + %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) { + %ld = affine.load %in[%i, %j] : memref<256x512xf32> + %min = minf %red_iter, %ld : f32 + affine.yield %min : f32 + } + affine.store %final_red, %out[%i] : memref<256xf32> + } + return +} + +// CHECK-LABEL: @vecdim_reduction_minf +// CHECK: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: affine.for %{{.*}} = 0 to 256 { +// CHECK: %[[vmax:.*]] = arith.constant dense<0x7F800000> : vector<128xf32> +// CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vmax]]) -> (vector<128xf32>) { +// CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xf32>, vector<128xf32> +// CHECK: %[[min:.*]] = minf %[[red_iter]], %[[ld]] : vector<128xf32> +// CHECK: affine.yield %[[min]] : vector<128xf32> +// CHECK: } +// CHECK: %[[nonfinal_min:.*]] = vector.reduction "minf", %[[vred:.*]] : vector<128xf32> into f32 +// Note that to compute the final result we need to get the minimum of the original initial value +// (%cst) and %nonfinal_min since the original initial value is 0, not the max value. +// CHECK: %[[final_min:.*]] = minf %[[nonfinal_min]], %[[cst]] : f32 +// CHECK: affine.store %[[final_min]], %{{.*}} : memref<256xf32> +// CHECK: } + +// ----- + // The inner reduction loop '%j' is vectorized. (The order of addf's operands is // different than in the previous test case).