diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir @@ -96,23 +96,16 @@ // Verify memory is zeroed by doing an add reduction with initial value of // zero. - %init_0 = arith.constant 0 : i64 - %add_reduce = scf.for %vnum = %c0 to %svl_b step %c1_index iter_args(%iter = %init_0) -> (i64) { + %init_0 = arith.constant 0 : i8 + %add_reduce = scf.for %vnum = %c0 to %svl_b step %c1_index iter_args(%iter = %init_0) -> (i8) { %row = vector.load %za_b[%vnum, %c0] : memref, vector<[16]xi8> - - %inner_add_reduce = scf.for %offset = %c0 to %svl_b step %c1_index iter_args(%inner_iter = %init_0) -> (i64) { - %t = vector.extractelement %row[%offset : index] : vector<[16]xi8> - %t_i64 = arith.extui %t : i8 to i64 - %inner_add_reduce_next = arith.addi %inner_iter, %t_i64 : i64 - scf.yield %inner_add_reduce_next : i64 - } - - %add_reduce_next = arith.addi %iter, %inner_add_reduce : i64 - scf.yield %add_reduce_next : i64 + %row_sum = vector.reduction , %row : vector<[16]xi8> into i8 + %add_reduce_next = arith.addi %iter, %row_sum : i8 + scf.yield %add_reduce_next : i8 } // CHECK-NEXT: 0 - vector.print %add_reduce : i64 + vector.print %add_reduce : i8 // Verify the add reduction works as expected. // @@ -121,23 +114,16 @@ // function. memref.store %c4, %za_b[%c3, %c7] : memref memref.store %c15, %za_b[%c7, %c3] : memref - %add_reduce2 = scf.for %vnum = %c0 to %svl_b step %c1_index iter_args(%iter = %init_0) -> (i64) { + %add_reduce2 = scf.for %vnum = %c0 to %svl_b step %c1_index iter_args(%iter = %init_0) -> (i8) { %row = vector.load %za_b[%vnum, %c0] : memref, vector<[16]xi8> - - %inner_add_reduce = scf.for %offset = %c0 to %svl_b step %c1_index iter_args(%inner_iter = %init_0) -> (i64) { - %t = vector.extractelement %row[%offset : index] : vector<[16]xi8> - %t_i64 = arith.extui %t : i8 to i64 - %inner_add_reduce_next = arith.addi %inner_iter, %t_i64 : i64 - scf.yield %inner_add_reduce_next : i64 - } - - %add_reduce_next = arith.addi %iter, %inner_add_reduce : i64 - scf.yield %add_reduce_next : i64 + %row_sum = vector.reduction , %row : vector<[16]xi8> into i8 + %add_reduce_next = arith.addi %iter, %row_sum : i8 + scf.yield %add_reduce_next : i8 } // 15+4=19 // CHECK-NEXT: 19 - vector.print %add_reduce2 : i64 + vector.print %add_reduce2 : i8 %c0_i32 = arith.constant 0 : i32 return %c0_i32 : i32