This is an archive of the discontinued LLVM Phabricator instance.

[mlir] [VectorOps] Improve lowering of extract_strided_slice (and friends like shape_cast)
ClosedPublic

Authored by aartbik on Aug 6 2020, 3:44 PM.

Download Raw Diff

Details

Reviewers

ftynse
bkramer
nicolasvasilache
ThomasRaoux

Commits

rGc3c95b9c8085: [mlir] [VectorOps] Improve lowering of extract_strided_slice (and friends like…

Summary

Using a shuffle for the last recursive step in progressive lowering not only
results in much more compact IR, but also more efficient code (since the
backend is no longer confused on subvector aliasing for longer vectors).

E.g. the following

%f = vector.shape_cast %v0: vector<1024xf32> to vector<32x32xf32>

yields much better x86-64 code that runs 3x faster than the original.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

aartbik created this revision.Aug 6 2020, 3:44 PM

Herald added a reviewer: ftynse. · View Herald TranscriptAug 6 2020, 3:44 PM

Herald added a project: Restricted Project. · View Herald Transcript

Herald added subscribers: msifontes, jurahul, Kayjukh and 11 others. · View Herald Transcript

aartbik requested review of this revision.Aug 6 2020, 3:44 PM

Herald added subscribers: stephenneuendorffer, nicolasvasilache. · View Herald TranscriptAug 6 2020, 3:44 PM

aartbik added reviewers: bkramer, nicolasvasilache, ThomasRaoux.Aug 6 2020, 3:48 PM

Harbormaster completed remote builds in B67388: Diff 283753.Aug 6 2020, 3:52 PM

aartbik retitled this revision from [mlir] [VectorOps] Improve lowering of vector.extract_strided_slice (and friends like shape_cast) to [mlir] [VectorOps] Improve lowering of extract_strided_slice (and friends like shape_cast).Aug 6 2020, 3:53 PM

aartbik edited the summary of this revision. (Show Details)

This revision is now accepted and ready to land.Aug 6 2020, 4:48 PM

trigger tests

Harbormaster completed remote builds in B67397: Diff 283776.Aug 6 2020, 5:34 PM

Great, thanks for fixing this perf bug !

This revision was landed with ongoing or failed builds.Aug 7 2020, 9:21 AM

Closed by commit rGc3c95b9c8085: [mlir] [VectorOps] Improve lowering of extract_strided_slice (and friends like… (authored by aartbik). · Explain Why

This revision was automatically updated to reflect the committed changes.

aartbik added a commit: rGc3c95b9c8085: [mlir] [VectorOps] Improve lowering of extract_strided_slice (and friends like….

Revision Contents

Path

Size

mlir/

lib/

Conversion/

VectorToLLVM/

ConvertVectorToLLVM.cpp

37 lines

test/

Conversion/

VectorToLLVM/

vector-to-llvm.mlir

85 lines

Diff 283776

mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp

Show First 20 Lines • Show All 1,346 Lines • ▼ Show 20 Lines	Operation getPrintComma(Operation op) const {
return getPrint(op, typeConverter.getDialect(), "print_comma", {});		return getPrint(op, typeConverter.getDialect(), "print_comma", {});
}		}
Operation getPrintNewline(Operation op) const {		Operation getPrintNewline(Operation op) const {
return getPrint(op, typeConverter.getDialect(), "print_newline", {});		return getPrint(op, typeConverter.getDialect(), "print_newline", {});
}		}
};		};

/// Progressive lowering of ExtractStridedSliceOp to either:		/// Progressive lowering of ExtractStridedSliceOp to either:
/// 1. extractelement + insertelement for the 1-D case		/// 1. express single offset extract as a direct shuffle.
/// 2. extract + optional strided_slice + insert for the n-D case.		/// 2. extract + lower rank strided_slice + insert for the n-D case.
class VectorStridedSliceOpConversion		class VectorExtractStridedSliceOpConversion
: public OpRewritePattern<ExtractStridedSliceOp> {		: public OpRewritePattern<ExtractStridedSliceOp> {
public:		public:
using OpRewritePattern<ExtractStridedSliceOp>::OpRewritePattern;		using OpRewritePattern<ExtractStridedSliceOp>::OpRewritePattern;

LogicalResult matchAndRewrite(ExtractStridedSliceOp op,		LogicalResult matchAndRewrite(ExtractStridedSliceOp op,
PatternRewriter &rewriter) const override {		PatternRewriter &rewriter) const override {
auto dstType = op.getResult().getType().cast<VectorType>();		auto dstType = op.getResult().getType().cast<VectorType>();

assert(!op.offsets().getValue().empty() && "Unexpected empty offsets");		assert(!op.offsets().getValue().empty() && "Unexpected empty offsets");

int64_t offset =		int64_t offset =
op.offsets().getValue().front().cast<IntegerAttr>().getInt();		op.offsets().getValue().front().cast<IntegerAttr>().getInt();
int64_t size = op.sizes().getValue().front().cast<IntegerAttr>().getInt();		int64_t size = op.sizes().getValue().front().cast<IntegerAttr>().getInt();
int64_t stride =		int64_t stride =
op.strides().getValue().front().cast<IntegerAttr>().getInt();		op.strides().getValue().front().cast<IntegerAttr>().getInt();

auto loc = op.getLoc();		auto loc = op.getLoc();
auto elemType = dstType.getElementType();		auto elemType = dstType.getElementType();
assert(elemType.isSignlessIntOrIndexOrFloat());		assert(elemType.isSignlessIntOrIndexOrFloat());

		// Single offset can be more efficiently shuffled.
		if (op.offsets().getValue().size() == 1) {
		SmallVector<int64_t, 4> offsets;
		offsets.reserve(size);
		for (int64_t off = offset, e = offset + size * stride; off < e;
		off += stride)
		offsets.push_back(off);
		rewriter.replaceOpWithNewOp<ShuffleOp>(op, dstType, op.vector(),
		op.vector(),
		rewriter.getI64ArrayAttr(offsets));
		return success();
		}

		// Extract/insert on a lower ranked extract strided slice op.
Value zero = rewriter.create<ConstantOp>(loc, elemType,		Value zero = rewriter.create<ConstantOp>(loc, elemType,
rewriter.getZeroAttr(elemType));		rewriter.getZeroAttr(elemType));
Value res = rewriter.create<SplatOp>(loc, dstType, zero);		Value res = rewriter.create<SplatOp>(loc, dstType, zero);
for (int64_t off = offset, e = offset + size * stride, idx = 0; off < e;		for (int64_t off = offset, e = offset + size * stride, idx = 0; off < e;
off += stride, ++idx) {		off += stride, ++idx) {
Value extracted = extractOne(rewriter, loc, op.vector(), off);		Value one = extractOne(rewriter, loc, op.vector(), off);
if (op.offsets().getValue().size() > 1) {		Value extracted = rewriter.create<ExtractStridedSliceOp>(
extracted = rewriter.create<ExtractStridedSliceOp>(		loc, one, getI64SubArray(op.offsets(), /* dropFront=*/1),
loc, extracted, getI64SubArray(op.offsets(), /* dropFront=*/1),
getI64SubArray(op.sizes(), /* dropFront=*/1),		getI64SubArray(op.sizes(), /* dropFront=*/1),
getI64SubArray(op.strides(), /* dropFront=*/1));		getI64SubArray(op.strides(), /* dropFront=*/1));
}
res = insertOne(rewriter, loc, extracted, res, idx);		res = insertOne(rewriter, loc, extracted, res, idx);
}		}
rewriter.replaceOp(op, {res});		rewriter.replaceOp(op, res);
return success();		return success();
}		}
/// This pattern creates recursive ExtractStridedSliceOp, but the recursion is		/// This pattern creates recursive ExtractStridedSliceOp, but the recursion is
/// bounded as the rank is strictly decreasing.		/// bounded as the rank is strictly decreasing.
bool hasBoundedRewriteRecursion() const final { return true; }		bool hasBoundedRewriteRecursion() const final { return true; }
};		};

} // namespace		} // namespace

/// Populate the given list with patterns that convert from Vector to LLVM.		/// Populate the given list with patterns that convert from Vector to LLVM.
void mlir::populateVectorToLLVMConversionPatterns(		void mlir::populateVectorToLLVMConversionPatterns(
LLVMTypeConverter &converter, OwningRewritePatternList &patterns,		LLVMTypeConverter &converter, OwningRewritePatternList &patterns,
bool reassociateFPReductions) {		bool reassociateFPReductions) {
MLIRContext *ctx = converter.getDialect()->getContext();		MLIRContext *ctx = converter.getDialect()->getContext();
// clang-format off		// clang-format off
patterns.insert<VectorFMAOpNDRewritePattern,		patterns.insert<VectorFMAOpNDRewritePattern,
VectorInsertStridedSliceOpDifferentRankRewritePattern,		VectorInsertStridedSliceOpDifferentRankRewritePattern,
VectorInsertStridedSliceOpSameRankRewritePattern,		VectorInsertStridedSliceOpSameRankRewritePattern,
VectorStridedSliceOpConversion>(ctx);		VectorExtractStridedSliceOpConversion>(ctx);
patterns.insert<VectorReductionOpConversion>(		patterns.insert<VectorReductionOpConversion>(
ctx, converter, reassociateFPReductions);		ctx, converter, reassociateFPReductions);
patterns		patterns
.insert<VectorShuffleOpConversion,		.insert<VectorShuffleOpConversion,
VectorExtractElementOpConversion,		VectorExtractElementOpConversion,
VectorExtractOpConversion,		VectorExtractOpConversion,
VectorFMAOp1DConversion,		VectorFMAOp1DConversion,
VectorInsertElementOpConversion,		VectorInsertElementOpConversion,
▲ Show 20 Lines • Show All 61 Lines • Show Last 20 Lines

mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir

	Show First 20 Lines • Show All 506 Lines • ▼ Show 20 Lines
	// CHECK: llvm.call @print_close() : () -> ()			// CHECK: llvm.call @print_close() : () -> ()
	// CHECK: llvm.call @print_close() : () -> ()			// CHECK: llvm.call @print_close() : () -> ()
	// CHECK: llvm.call @print_newline() : () -> ()			// CHECK: llvm.call @print_newline() : () -> ()

	func @extract_strided_slice1(%arg0: vector<4xf32>) -> vector<2xf32> {			func @extract_strided_slice1(%arg0: vector<4xf32>) -> vector<2xf32> {
	%0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32>			%0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32>
	return %0 : vector<2xf32>			return %0 : vector<2xf32>
	}			}
	// CHECK-LABEL: llvm.func @extract_strided_slice1			// CHECK-LABEL: llvm.func @extract_strided_slice1(
	// CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float			// CHECK-SAME: %[[A:.*]]: !llvm.vec<4 x float>)
	// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2xf32>) : !llvm.vec<2 x float>			// CHECK: %[[T0:.*]] = llvm.shufflevector %[[A]], %[[A]] [2, 3] : !llvm.vec<4 x float>, !llvm.vec<4 x float>
	// CHECK: llvm.mlir.constant(2 : index) : !llvm.i64			// CHECK: llvm.return %[[T0]] : !llvm.vec<2 x float>
	// CHECK: llvm.extractelement %{{.}}[%{{.}} : !llvm.i64] : !llvm.vec<4 x float>
	// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64
	// CHECK: llvm.insertelement %{{.}}, %{{.}}[%{{.*}} : !llvm.i64] : !llvm.vec<2 x float>
	// CHECK: llvm.mlir.constant(3 : index) : !llvm.i64
	// CHECK: llvm.extractelement %{{.}}[%{{.}} : !llvm.i64] : !llvm.vec<4 x float>
	// CHECK: llvm.mlir.constant(1 : index) : !llvm.i64
	// CHECK: llvm.insertelement %{{.}}, %{{.}}[%{{.*}} : !llvm.i64] : !llvm.vec<2 x float>

	func @extract_strided_slice2(%arg0: vector<4x8xf32>) -> vector<2x8xf32> {			func @extract_strided_slice2(%arg0: vector<4x8xf32>) -> vector<2x8xf32> {
	%0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4x8xf32> to vector<2x8xf32>			%0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4x8xf32> to vector<2x8xf32>
	return %0 : vector<2x8xf32>			return %0 : vector<2x8xf32>
	}			}
	// CHECK-LABEL: llvm.func @extract_strided_slice2			// CHECK-LABEL: llvm.func @extract_strided_slice2(
	// CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float			// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vec<8 x float>>)
	// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2x8xf32>) : !llvm.array<2 x vec<8 x float>>			// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vec<8 x float>>
	// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm.array<4 x vec<8 x float>>			// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vec<8 x float>>
	// CHECK: llvm.insertvalue %{{.}}, %{{.}}[0] : !llvm.array<2 x vec<8 x float>>			// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<2 x vec<8 x float>>
	// CHECK: llvm.extractvalue %{{.*}}[3] : !llvm.array<4 x vec<8 x float>>			// CHECK: %[[T3:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vec<8 x float>>
	// CHECK: llvm.insertvalue %{{.}}, %{{.}}[1] : !llvm.array<2 x vec<8 x float>>			// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T2]][1] : !llvm.array<2 x vec<8 x float>>
				// CHECK: llvm.return %[[T4]] : !llvm.array<2 x vec<8 x float>>

	func @extract_strided_slice3(%arg0: vector<4x8xf32>) -> vector<2x2xf32> {			func @extract_strided_slice3(%arg0: vector<4x8xf32>) -> vector<2x2xf32> {
	%0 = vector.extract_strided_slice %arg0 {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x8xf32> to vector<2x2xf32>			%0 = vector.extract_strided_slice %arg0 {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x8xf32> to vector<2x2xf32>
	return %0 : vector<2x2xf32>			return %0 : vector<2x2xf32>
	}			}
	// CHECK-LABEL: llvm.func @extract_strided_slice3			// CHECK-LABEL: llvm.func @extract_strided_slice3(
	// CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float			// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vec<8 x float>>)
	// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2x2xf32>) : !llvm.array<2 x vec<2 x float>>			// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<2x2xf32>) : !llvm.array<2 x vec<2 x float>>
	//			// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vec<8 x float>>
	// Subvector vector<8xf32> @2			// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T2]] [2, 3] : !llvm.vec<8 x float>, !llvm.vec<8 x float>
	// CHECK: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x vec<8 x float>>			// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T1]][0] : !llvm.array<2 x vec<2 x float>>
	// CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float			// CHECK: %[[T5:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vec<8 x float>>
	// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2xf32>) : !llvm.vec<2 x float>			// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T5]] [2, 3] : !llvm.vec<8 x float>, !llvm.vec<8 x float>
	// CHECK: llvm.mlir.constant(2 : index) : !llvm.i64			// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T4]][1] : !llvm.array<2 x vec<2 x float>>
	// CHECK: llvm.extractelement {{.}}[{{.}} : !llvm.i64] : !llvm.vec<8 x float>			// CHECK: llvm.return %[[T7]] : !llvm.array<2 x vec<2 x float>>
	// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64
	// CHECK: llvm.insertelement {{.}}, {{.}}[{{.*}} : !llvm.i64] : !llvm.vec<2 x float>
	// CHECK: llvm.mlir.constant(3 : index) : !llvm.i64
	// CHECK: llvm.extractelement {{.}}[{{.}} : !llvm.i64] : !llvm.vec<8 x float>
	// CHECK: llvm.mlir.constant(1 : index) : !llvm.i64
	// CHECK: llvm.insertelement {{.}}, {{.}}[{{.*}} : !llvm.i64] : !llvm.vec<2 x float>
	// CHECK: llvm.insertvalue {{.}}, {{.}}[0] : !llvm.array<2 x vec<2 x float>>
	//
	// Subvector vector<8xf32> @3
	// CHECK: llvm.extractvalue {{.*}}[3] : !llvm.array<4 x vec<8 x float>>
	// CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float
	// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2xf32>) : !llvm.vec<2 x float>
	// CHECK: llvm.mlir.constant(2 : index) : !llvm.i64
	// CHECK: llvm.extractelement {{.}}[{{.}} : !llvm.i64] : !llvm.vec<8 x float>
	// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64
	// CHECK: llvm.insertelement {{.}}, {{.}}[{{.*}} : !llvm.i64] : !llvm.vec<2 x float>
	// CHECK: llvm.mlir.constant(3 : index) : !llvm.i64
	// CHECK: llvm.extractelement {{.}}[{{.}} : !llvm.i64] : !llvm.vec<8 x float>
	// CHECK: llvm.mlir.constant(1 : index) : !llvm.i64
	// CHECK: llvm.insertelement {{.}}, {{.}}[{{.*}} : !llvm.i64] : !llvm.vec<2 x float>
	// CHECK: llvm.insertvalue {{.}}, {{.}}[1] : !llvm.array<2 x vec<2 x float>>

	func @insert_strided_slice1(%b: vector<4x4xf32>, %c: vector<4x4x4xf32>) -> vector<4x4x4xf32> {			func @insert_strided_slice1(%b: vector<4x4xf32>, %c: vector<4x4x4xf32>) -> vector<4x4x4xf32> {
	%0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x4xf32> into vector<4x4x4xf32>			%0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x4xf32> into vector<4x4x4xf32>
	return %0 : vector<4x4x4xf32>			return %0 : vector<4x4x4xf32>
	}			}
	// CHECK-LABEL: llvm.func @insert_strided_slice1			// CHECK-LABEL: llvm.func @insert_strided_slice1
	// CHECK: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x array<4 x vec<4 x float>>>			// CHECK: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x array<4 x vec<4 x float>>>
	// CHECK-NEXT: llvm.insertvalue {{.}}, {{.}}[2] : !llvm.array<4 x array<4 x vec<4 x float>>>			// CHECK-NEXT: llvm.insertvalue {{.}}, {{.}}[2] : !llvm.array<4 x array<4 x vec<4 x float>>>
	▲ Show 20 Lines • Show All 87 Lines • ▼ Show 20 Lines
	func @extract_strides(%arg0: vector<3x3xf32>) -> vector<1x1xf32> {			func @extract_strides(%arg0: vector<3x3xf32>) -> vector<1x1xf32> {
	%0 = vector.extract_slices %arg0, [2, 2], [1, 1]			%0 = vector.extract_slices %arg0, [2, 2], [1, 1]
	: vector<3x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>, vector<1x2xf32>, vector<1x1xf32>>			: vector<3x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>, vector<1x2xf32>, vector<1x1xf32>>
	%1 = vector.tuple_get %0, 3 : tuple<vector<2x2xf32>, vector<2x1xf32>, vector<1x2xf32>, vector<1x1xf32>>			%1 = vector.tuple_get %0, 3 : tuple<vector<2x2xf32>, vector<2x1xf32>, vector<1x2xf32>, vector<1x1xf32>>
	return %1 : vector<1x1xf32>			return %1 : vector<1x1xf32>
	}			}
	// CHECK-LABEL: llvm.func @extract_strides(			// CHECK-LABEL: llvm.func @extract_strides(
	// CHECK-SAME: %[[A:.*]]: !llvm.array<3 x vec<3 x float>>)			// CHECK-SAME: %[[A:.*]]: !llvm.array<3 x vec<3 x float>>)
	// CHECK: %[[s0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<1x1xf32>) : !llvm.array<1 x vec<1 x float>>			// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<1x1xf32>) : !llvm.array<1 x vec<1 x float>>
	// CHECK: %[[s1:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<3 x vec<3 x float>>			// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<3 x vec<3 x float>>
	// CHECK: %[[s3:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<1xf32>) : !llvm.vec<1 x float>			// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T2]] [2] : !llvm.vec<3 x float>, !llvm.vec<3 x float>
	// CHECK: %[[s4:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64			// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T1]][0] : !llvm.array<1 x vec<1 x float>>
	// CHECK: %[[s5:.*]] = llvm.extractelement %[[s1]][%[[s4]] : !llvm.i64] : !llvm.vec<3 x float>			// CHECK: llvm.return %[[T4]] : !llvm.array<1 x vec<1 x float>>
	// CHECK: %[[s6:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64
	// CHECK: %[[s7:.*]] = llvm.insertelement %[[s5]], %[[s3]][%[[s6]] : !llvm.i64] : !llvm.vec<1 x float>
	// CHECK: %[[s8:.*]] = llvm.insertvalue %[[s7]], %[[s0]][0] : !llvm.array<1 x vec<1 x float>>
	// CHECK: llvm.return %[[s8]] : !llvm.array<1 x vec<1 x float>>

	// CHECK-LABEL: llvm.func @vector_fma(			// CHECK-LABEL: llvm.func @vector_fma(
	// CHECK-SAME: %[[A:.]]: !llvm.vec<8 x float>, %[[B:.]]: !llvm.array<2 x vec<4 x float>>)			// CHECK-SAME: %[[A:.]]: !llvm.vec<8 x float>, %[[B:.]]: !llvm.array<2 x vec<4 x float>>)
	// CHECK-SAME: -> !llvm.struct<(vec<8 x float>, array<2 x vec<4 x float>>)> {			// CHECK-SAME: -> !llvm.struct<(vec<8 x float>, array<2 x vec<4 x float>>)> {
	func @vector_fma(%a: vector<8xf32>, %b: vector<2x4xf32>) -> (vector<8xf32>, vector<2x4xf32>) {			func @vector_fma(%a: vector<8xf32>, %b: vector<2x4xf32>) -> (vector<8xf32>, vector<2x4xf32>) {
	// CHECK: "llvm.intr.fmuladd"(%[[A]], %[[A]], %[[A]]) :			// CHECK: "llvm.intr.fmuladd"(%[[A]], %[[A]], %[[A]]) :
	// CHECK-SAME: (!llvm.vec<8 x float>, !llvm.vec<8 x float>, !llvm.vec<8 x float>) -> !llvm.vec<8 x float>			// CHECK-SAME: (!llvm.vec<8 x float>, !llvm.vec<8 x float>, !llvm.vec<8 x float>) -> !llvm.vec<8 x float>
	%0 = vector.fma %a, %a, %a : vector<8xf32>			%0 = vector.fma %a, %a, %a : vector<8xf32>
	▲ Show 20 Lines • Show All 338 Lines • Show Last 20 Lines