Index: mlir/lib/Dialect/Affine/IR/AffineOps.cpp =================================================================== --- mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -1816,7 +1816,10 @@ if (!ivArg || !ivArg.getOwner()) return AffineForOp(); auto *containingInst = ivArg.getOwner()->getParent()->getParentOp(); - return dyn_cast(containingInst); + if (auto forOp = dyn_cast(containingInst)) + // Check to make sure `val` is the induction variable, not an iter_arg. + return forOp.getInductionVar() == val ? forOp : AffineForOp(); + return AffineForOp(); } /// Extracts the induction variables from a list of AffineForOps and returns Index: mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp =================================================================== --- mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +++ mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp @@ -253,7 +253,8 @@ /// transfer read and write operations. /// * Scalar constant operations/operands are converted to vector /// constant operations (splat). -/// * Uniform operands (only operands defined outside of the loop nest, +/// * Uniform operands (only induction variables of loops not mapped to +/// a vector dimension, or operands defined outside of the loop nest /// for now) are broadcasted to a vector. /// TODO: Support more uniform cases. /// * Affine for operations with 'iter_args' are vectorized by @@ -1062,10 +1063,16 @@ /// Returns true if the provided value is vector uniform given the vectorization /// strategy. -// TODO: For now, only values that are invariants to all the loops in the -// vectorization strategy are considered vector uniforms. +// TODO: For now, only values that are induction variables of loops not in +// `loopToVectorDim` or invariants to all the loops in the vectorization +// strategy are considered vector uniforms. static bool isUniformDefinition(Value value, const VectorizationStrategy *strategy) { + AffineForOp forOp = getForInductionVarOwner(value); + if (forOp && + strategy->loopToVectorDim.find(forOp) == strategy->loopToVectorDim.end()) + return true; + for (auto loopToDim : strategy->loopToVectorDim) { auto loop = cast(loopToDim.first); if (!loop.isDefinedOutsideOfLoop(value)) @@ -1079,11 +1086,13 @@ static Operation *vectorizeUniform(Value uniformVal, VectorizationState &state) { OpBuilder::InsertionGuard guard(state.builder); - state.builder.setInsertionPointAfterValue(uniformVal); + auto uniformScalarRepl = + state.valueScalarReplacement.lookupOrDefault(uniformVal); + state.builder.setInsertionPointAfterValue(uniformScalarRepl); auto vectorTy = getVectorType(uniformVal.getType(), state.strategy); auto bcastOp = state.builder.create(uniformVal.getLoc(), - vectorTy, uniformVal); + vectorTy, uniformScalarRepl); state.registerValueVectorReplacement(uniformVal, bcastOp); return bcastOp; } Index: mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir =================================================================== --- mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir +++ mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir @@ -165,6 +165,56 @@ // ----- +// CHECK-LABEL: func @vec_block_arg +func @vec_block_arg(%A : memref<32x512xi32>) { + // CHECK: affine.for %[[IV0:[arg0-9]+]] = 0 to 512 step 128 { + // CHECK-NEXT: affine.for %[[IV1:[arg0-9]+]] = 0 to 32 { + // CHECK-NEXT: %[[BROADCAST:.*]] = vector.broadcast %[[IV1]] : index to vector<128xindex> + // CHECK-NEXT: %[[CAST:.*]] = index_cast %[[BROADCAST]] : vector<128xindex> to vector<128xi32> + // CHECK-NEXT: vector.transfer_write %[[CAST]], {{.*}}[%[[IV1]], %[[IV0]]] : vector<128xi32>, memref<32x512xi32> + affine.for %i = 0 to 512 { // vectorized + affine.for %j = 0 to 32 { + %idx = std.index_cast %j : index to i32 + affine.store %idx, %A[%j, %i] : memref<32x512xi32> + } + } + return +} + +// ----- + +// CHECK-DAG: #[[$map0:map[0-9]+]] = affine_map<(d0, d1, d2) -> (d0 * 2 + d1 - 1)> +// CHECK-DAG: #[[$map1:map[0-9]+]] = affine_map<(d0, d1, d2) -> (d2)> +// CHECK-LABEL: func @vec_block_arg_2 +func @vec_block_arg_2(%A : memref) { + %c0 = constant 0 : index + %N = memref.dim %A, %c0 : memref + // CHECK: affine.for %[[IV0:[arg0-9]+]] = 0 to %{{.*}} { + // CHECK-NEXT: %[[BROADCAST1:.*]] = vector.broadcast %[[IV0]] : index to vector<128xindex> + // CHECK-NEXT: affine.for %[[IV1:[arg0-9]+]] = 0 to 512 step 128 { + // CHECK-NOT: vector.broadcast %[[IV1]] + // CHECK: affine.for %[[IV2:[arg0-9]+]] = 0 to 2 { + // CHECK-NEXT: %[[BROADCAST2:.*]] = vector.broadcast %[[IV2]] : index to vector<128xindex> + // CHECK-NEXT: %[[INDEX1:.*]] = affine.apply #[[$map0]](%[[IV0]], %[[IV2]], %[[IV1]]) + // CHECK-NEXT: %[[INDEX2:.*]] = affine.apply #[[$map1]](%[[IV0]], %[[IV2]], %[[IV1]]) + // CHECK: %[[LOAD:.*]] = vector.transfer_read %{{.*}}[%[[INDEX1]], %[[INDEX2]]], %{{.*}} : memref, vector<128xindex> + // CHECK-NEXT: muli %[[BROADCAST1]], %[[LOAD]] : vector<128xindex> + // CHECK-NEXT: addi %{{.*}}, %[[BROADCAST2]] : vector<128xindex> + // CHECK: } + affine.for %i0 = 0 to %N { + affine.for %i1 = 0 to 512 { // vectorized + affine.for %i2 = 0 to 2 { + %0 = affine.load %A[%i0 * 2 + %i2 - 1, %i1] : memref + %mul = muli %i0, %0 : index + %add = addi %mul, %i2 : index + } + } + } + return +} + +// ----- + // CHECK-LABEL: func @vec_rejected_1 func @vec_rejected_1(%A : memref, %B : memref) { // CHECK-DAG: %[[C0:.*]] = constant 0 : index