diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -654,6 +654,7 @@ let parser = [{ return parseReturnOp(parser, result); }]; let printer = [{ printReturnOp(p, *this); }]; + let verifier = [{ return ::verify(*this); }]; } def LLVM_ResumeOp : LLVM_TerminatorOp<"resume", []> { let arguments = (ins LLVM_Type:$value); diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -1116,10 +1116,10 @@ } //===----------------------------------------------------------------------===// -// Printing/parsing for LLVM::ReturnOp. +// Printing, parsing and verification for LLVM::ReturnOp. //===----------------------------------------------------------------------===// -static void printReturnOp(OpAsmPrinter &p, ReturnOp &op) { +static void printReturnOp(OpAsmPrinter &p, ReturnOp op) { p << op.getOperationName(); p.printOptionalAttrDict(op.getAttrs()); assert(op.getNumOperands() <= 1); @@ -1148,6 +1148,35 @@ return success(); } +static LogicalResult verify(ReturnOp op) { + if (op->getNumOperands() > 1) + return op->emitOpError("expected at most 1 operand"); + + if (auto parent = op->getParentOfType()) { + Type expectedType = parent.getType().getReturnType(); + if (expectedType.isa()) { + if (op->getNumOperands() == 0) + return success(); + InFlightDiagnostic diag = op->emitOpError("expected no operands"); + diag.attachNote(parent->getLoc()) << "when returning from function"; + return diag; + } + if (op->getNumOperands() == 0) { + if (expectedType.isa()) + return success(); + InFlightDiagnostic diag = op->emitOpError("expected 1 operand"); + diag.attachNote(parent->getLoc()) << "when returning from function"; + return diag; + } + if (expectedType != op->getOperand(0).getType()) { + InFlightDiagnostic diag = op->emitOpError("mismatching result types"); + diag.attachNote(parent->getLoc()) << "when returning from function"; + return diag; + } + } + return success(); +} + //===----------------------------------------------------------------------===// // Verifier for LLVM::AddressOfOp. //===----------------------------------------------------------------------===// @@ -1528,6 +1557,20 @@ return success(); } +static bool isZeroAttribute(Attribute value) { + if (auto intValue = value.dyn_cast()) + return intValue.getValue().isNullValue(); + if (auto fpValue = value.dyn_cast()) + return fpValue.getValue().isZero(); + if (auto splatValue = value.dyn_cast()) + return isZeroAttribute(splatValue.getSplatValue()); + if (auto elementsValue = value.dyn_cast()) + return llvm::all_of(elementsValue.getValues(), isZeroAttribute); + if (auto arrayValue = value.dyn_cast()) + return llvm::all_of(arrayValue.getValue(), isZeroAttribute); + return false; +} + static LogicalResult verify(GlobalOp op) { if (!LLVMPointerType::isValidElementType(op.getType())) return op.emitOpError( @@ -1558,6 +1601,25 @@ if (op.getValueOrNull()) return op.emitOpError("cannot have both initializer value and region"); } + + if (op.linkage() == Linkage::Common) { + if (Attribute value = op.getValueOrNull()) { + if (!isZeroAttribute(value)) { + return op.emitOpError() + << "expected zero value for '" + << stringifyLinkage(Linkage::Common) << "' linkage"; + } + } + } + + if (op.linkage() == Linkage::Appending) { + if (!op.getType().isa()) { + return op.emitOpError() + << "expected array type for '" + << stringifyLinkage(Linkage::Appending) << "' linkage"; + } + } + return success(); } @@ -1840,8 +1902,17 @@ //===----------------------------------------------------------------------===// static LogicalResult verify(LLVM::ConstantOp op) { - if (!(op.value().isa() || op.value().isa() || - op.value().isa() || op.value().isa())) + if (StringAttr sAttr = op.value().dyn_cast()) { + auto arrayType = op.getType().dyn_cast(); + if (!arrayType || arrayType.getNumElements() != sAttr.getValue().size() || + !arrayType.getElementType().isInteger(8)) { + return op->emitOpError() + << "expected array type of " << sAttr.getValue().size() + << " i8 elements for the string constant"; + } + return success(); + } + if (!op.value().isa()) return op.emitOpError() << "only supports integer, float, string or elements attributes"; return success(); @@ -1964,6 +2035,14 @@ intBitWidth != 64) return op.emitOpError("expected LLVM IR integer type"); } + + if (static_cast(op.ordering()) < + static_cast(AtomicOrdering::monotonic)) + return op.emitOpError() + << "expected at least '" + << stringifyAtomicOrdering(AtomicOrdering::monotonic) + << "' ordering"; + return success(); } diff --git a/mlir/test/Dialect/LLVMIR/global.mlir b/mlir/test/Dialect/LLVMIR/global.mlir --- a/mlir/test/Dialect/LLVMIR/global.mlir +++ b/mlir/test/Dialect/LLVMIR/global.mlir @@ -44,7 +44,7 @@ // CHECK: llvm.mlir.global common llvm.mlir.global common @common() : i64 // CHECK: llvm.mlir.global appending -llvm.mlir.global appending @appending() : i64 +llvm.mlir.global appending @appending() : !llvm.array<2 x i64> // CHECK: llvm.mlir.global extern_weak llvm.mlir.global extern_weak @extern_weak() : i64 // CHECK: llvm.mlir.global linkonce_odr diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -222,6 +222,30 @@ // ----- +llvm.func @void_func_result(%arg0: i32) { + // expected-error@below {{expected no operands}} + // expected-note@above {{when returning from function}} + llvm.return %arg0: i32 +} + +// ----- + +llvm.func @non_void_func_no_result() -> i32 { + // expected-error@below {{expected 1 operand}} + // expected-note@above {{when returning from function}} + llvm.return +} + +// ----- + +llvm.func @func_result_mismatch(%arg0: f32) -> i32 { + // expected-error@below {{mismatching result types}} + // expected-note@above {{when returning from function}} + llvm.return %arg0 : f32 +} + +// ----- + func @constant_wrong_type() { // expected-error@+1 {{only supports integer, float, string or elements attributes}} llvm.mlir.constant(@constant_wrong_type) : !llvm.ptr> @@ -229,6 +253,13 @@ // ----- +func @constant_wrong_type_string() { + // expected-error@below {{expected array type of 3 i8 elements for the string constant}} + llvm.mlir.constant("foo") : !llvm.ptr +} + +// ----- + func @insertvalue_non_llvm_type(%a : i32, %b : i32) { // expected-error@+1 {{expected LLVM IR Dialect type}} llvm.insertvalue %a, %b[0] : tensor<*xi32> @@ -561,7 +592,7 @@ llvm.func @foo(i32) -> i32 llvm.func @__gxx_personality_v0(...) -> i32 -llvm.func @bad_landingpad(%arg0: !llvm.ptr>) attributes { personality = @__gxx_personality_v0} { +llvm.func @bad_landingpad(%arg0: !llvm.ptr>) -> i32 attributes { personality = @__gxx_personality_v0} { %0 = llvm.mlir.constant(3 : i32) : i32 %1 = llvm.mlir.constant(2 : i32) : i32 %2 = llvm.invoke @foo(%1) to ^bb1 unwind ^bb2 : (i32) -> i32 @@ -667,3 +698,18 @@ ^bb2(%1: i32, %2: i32): // pred: ^bb0 llvm.return } + +// ----- + +// expected-error@below {{expected zero value for 'common' linkage}} +llvm.mlir.global common @non_zero_global_common_linkage(42 : i32) : i32 + +// ----- + +// expected-error@below {{expected zero value for 'common' linkage}} +llvm.mlir.global common @non_zero_compound_global_common_linkage(dense<[0, 0, 0, 1, 0]> : vector<5xi32>) : !llvm.array<5 x i32> + +// ----- + +// expected-error@below {{expected array type for 'appending' linkage}} +llvm.mlir.global appending @non_array_type_global_appending_linkage() : i32 diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -284,8 +284,8 @@ // CHECK-LABEL: @atomicrmw func @atomicrmw(%ptr : !llvm.ptr, %val : f32) { - // CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} unordered : f32 - %0 = llvm.atomicrmw fadd %ptr, %val unordered : f32 + // CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} monotonic : f32 + %0 = llvm.atomicrmw fadd %ptr, %val monotonic : f32 llvm.return } diff --git a/mlir/test/Target/avx512.mlir b/mlir/test/Target/avx512.mlir --- a/mlir/test/Target/avx512.mlir +++ b/mlir/test/Target/avx512.mlir @@ -2,10 +2,10 @@ // CHECK-LABEL: define <16 x float> @LLVM_x86_avx512_mask_ps_512 llvm.func @LLVM_x86_avx512_mask_ps_512(%a: vector<16 x f32>, - %b: i32, %c: i16) -> (vector<16 x f32>) { + %b = llvm.mlir.constant(42 : i32) : i32 // CHECK: call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %0 = "llvm_avx512.mask.rndscale.ps.512"(%a, %b, %a, %c, %b) : (vector<16 x f32>, i32, vector<16 x f32>, i16, i32) -> vector<16 x f32> @@ -17,10 +17,10 @@ // CHECK-LABEL: define <8 x double> @LLVM_x86_avx512_mask_pd_512 llvm.func @LLVM_x86_avx512_mask_pd_512(%a: vector<8xf64>, - %b: i32, %c: i8) -> (vector<8xf64>) { + %b = llvm.mlir.constant(42 : i32) : i32 // CHECK: call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %0 = "llvm_avx512.mask.rndscale.pd.512"(%a, %b, %a, %c, %b) : (vector<8xf64>, i32, vector<8xf64>, i8, i32) -> vector<8xf64> @@ -30,22 +30,22 @@ llvm.return %1: vector<8xf64> } -// CHECK-LABEL: define <{ i16, i16 }> @LLVM_x86_vp2intersect_d_512 +// CHECK-LABEL: define { <16 x i1>, <16 x i1> } @LLVM_x86_vp2intersect_d_512 llvm.func @LLVM_x86_vp2intersect_d_512(%a: vector<16xi32>, %b: vector<16xi32>) - -> !llvm.struct + -> !llvm.struct<(vector<16 x i1>, vector<16 x i1>)> { // CHECK: call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %0 = "llvm_avx512.vp2intersect.d.512"(%a, %b) : - (vector<16xi32>, vector<16xi32>) -> !llvm.struct - llvm.return %0 : !llvm.struct + (vector<16xi32>, vector<16xi32>) -> !llvm.struct<(vector<16 x i1>, vector<16 x i1>)> + llvm.return %0 : !llvm.struct<(vector<16 x i1>, vector<16 x i1>)> } -// CHECK-LABEL: define <{ i8, i8 }> @LLVM_x86_vp2intersect_q_512 +// CHECK-LABEL: define { <8 x i1>, <8 x i1> } @LLVM_x86_vp2intersect_q_512 llvm.func @LLVM_x86_vp2intersect_q_512(%a: vector<8xi64>, %b: vector<8xi64>) - -> !llvm.struct + -> !llvm.struct<(vector<8 x i1>, vector<8 x i1>)> { // CHECK: call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %0 = "llvm_avx512.vp2intersect.q.512"(%a, %b) : - (vector<8xi64>, vector<8xi64>) -> !llvm.struct - llvm.return %0 : !llvm.struct + (vector<8xi64>, vector<8xi64>) -> !llvm.struct<(vector<8 x i1>, vector<8 x i1>)> + llvm.return %0 : !llvm.struct<(vector<8 x i1>, vector<8 x i1>)> } diff --git a/mlir/test/Target/llvmir-intrinsics.mlir b/mlir/test/Target/llvmir-intrinsics.mlir --- a/mlir/test/Target/llvmir-intrinsics.mlir +++ b/mlir/test/Target/llvmir-intrinsics.mlir @@ -302,12 +302,13 @@ } // CHECK-LABEL: @memcpy_test -llvm.func @memcpy_test(%arg0: i32, %arg1: i1, %arg2: !llvm.ptr, %arg3: !llvm.ptr) { - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 %{{.*}}, i1 %{{.*}}) - "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg1) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () +llvm.func @memcpy_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: !llvm.ptr) { + %i1 = llvm.mlir.constant(false) : i1 + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 %{{.*}}, i1 {{.*}}) + "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %i1) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () %sz = llvm.mlir.constant(10: i64) : i64 - // CHECK: call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 10, i1 %{{.*}}) - "llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %arg1) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () + // CHECK: call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 10, i1 {{.*}}) + "llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %i1) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () llvm.return } @@ -368,14 +369,17 @@ // CHECK-LABEL: @coro_id llvm.func @coro_id(%arg0: i32, %arg1: !llvm.ptr) { // CHECK: call token @llvm.coro.id - llvm.intr.coro.id %arg0, %arg1, %arg1, %arg1 : !llvm.token + %null = llvm.mlir.null : !llvm.ptr + llvm.intr.coro.id %arg0, %arg1, %arg1, %null : !llvm.token llvm.return } // CHECK-LABEL: @coro_begin -llvm.func @coro_begin(%arg0: !llvm.token, %arg1: !llvm.ptr) { +llvm.func @coro_begin(%arg0: i32, %arg1: !llvm.ptr) { + %null = llvm.mlir.null : !llvm.ptr + %token = llvm.intr.coro.id %arg0, %arg1, %arg1, %null : !llvm.token // CHECK: call i8* @llvm.coro.begin - llvm.intr.coro.begin %arg0, %arg1 : !llvm.ptr + llvm.intr.coro.begin %token, %arg1 : !llvm.ptr llvm.return } @@ -396,9 +400,11 @@ } // CHECK-LABEL: @coro_suspend -llvm.func @coro_suspend(%arg0: !llvm.token, %arg1 : i1) { +llvm.func @coro_suspend(%arg0: i32, %arg1 : i1, %arg2 : !llvm.ptr) { + %null = llvm.mlir.null : !llvm.ptr + %token = llvm.intr.coro.id %arg0, %arg2, %arg2, %null : !llvm.token // CHECK: call i8 @llvm.coro.suspend - %0 = llvm.intr.coro.suspend %arg0, %arg1 : i8 + %0 = llvm.intr.coro.suspend %token, %arg1 : i8 llvm.return } @@ -410,9 +416,11 @@ } // CHECK-LABEL: @coro_free -llvm.func @coro_free(%arg0: !llvm.token, %arg1 : !llvm.ptr) { +llvm.func @coro_free(%arg0: i32, %arg1 : !llvm.ptr) { + %null = llvm.mlir.null : !llvm.ptr + %token = llvm.intr.coro.id %arg0, %arg1, %arg1, %null : !llvm.token // CHECK: call i8* @llvm.coro.free - %0 = llvm.intr.coro.free %arg0, %arg1 : !llvm.ptr + %0 = llvm.intr.coro.free %token, %arg1 : !llvm.ptr llvm.return } diff --git a/mlir/test/Target/llvmir.mlir b/mlir/test/Target/llvmir.mlir --- a/mlir/test/Target/llvmir.mlir +++ b/mlir/test/Target/llvmir.mlir @@ -46,10 +46,10 @@ llvm.mlir.global linkonce @linkonce(42 : i32) : i32 // CHECK: @weak = weak global i32 42 llvm.mlir.global weak @weak(42 : i32) : i32 -// CHECK: @common = common global i32 42 -llvm.mlir.global common @common(42 : i32) : i32 -// CHECK: @appending = appending global i32 42 -llvm.mlir.global appending @appending(42 : i32) : i32 +// CHECK: @common = common global i32 0 +llvm.mlir.global common @common(0 : i32) : i32 +// CHECK: @appending = appending global [3 x i32] [i32 1, i32 2, i32 3] +llvm.mlir.global appending @appending(dense<[1,2,3]> : vector<3xi32>) : !llvm.array<3xi32> // CHECK: @extern_weak = extern_weak global i32 llvm.mlir.global extern_weak @extern_weak() : i32 // CHECK: @linkonce_odr = linkonce_odr global i32 42 @@ -984,10 +984,10 @@ llvm.return %1 : !llvm.ptr } -llvm.func @stringconstant() -> !llvm.ptr { - %1 = llvm.mlir.constant("Hello world!") : !llvm.ptr +llvm.func @stringconstant() -> !llvm.array<12 x i8> { + %1 = llvm.mlir.constant("Hello world!") : !llvm.array<12 x i8> // CHECK: ret [12 x i8] c"Hello world!" - llvm.return %1 : !llvm.ptr + llvm.return %1 : !llvm.array<12 x i8> } llvm.func @noreach() { @@ -1119,10 +1119,10 @@ llvm.func @atomicrmw( %f32_ptr : !llvm.ptr, %f32 : f32, %i32_ptr : !llvm.ptr, %i32 : i32) { - // CHECK: atomicrmw fadd float* %{{.*}}, float %{{.*}} unordered - %0 = llvm.atomicrmw fadd %f32_ptr, %f32 unordered : f32 - // CHECK: atomicrmw fsub float* %{{.*}}, float %{{.*}} unordered - %1 = llvm.atomicrmw fsub %f32_ptr, %f32 unordered : f32 + // CHECK: atomicrmw fadd float* %{{.*}}, float %{{.*}} monotonic + %0 = llvm.atomicrmw fadd %f32_ptr, %f32 monotonic : f32 + // CHECK: atomicrmw fsub float* %{{.*}}, float %{{.*}} monotonic + %1 = llvm.atomicrmw fsub %f32_ptr, %f32 monotonic : f32 // CHECK: atomicrmw xchg float* %{{.*}}, float %{{.*}} monotonic %2 = llvm.atomicrmw xchg %f32_ptr, %f32 monotonic : f32 // CHECK: atomicrmw add i32* %{{.*}}, i32 %{{.*}} acquire @@ -1133,18 +1133,18 @@ %5 = llvm.atomicrmw _and %i32_ptr, %i32 acq_rel : i32 // CHECK: atomicrmw nand i32* %{{.*}}, i32 %{{.*}} seq_cst %6 = llvm.atomicrmw nand %i32_ptr, %i32 seq_cst : i32 - // CHECK: atomicrmw or i32* %{{.*}}, i32 %{{.*}} unordered - %7 = llvm.atomicrmw _or %i32_ptr, %i32 unordered : i32 - // CHECK: atomicrmw xor i32* %{{.*}}, i32 %{{.*}} unordered - %8 = llvm.atomicrmw _xor %i32_ptr, %i32 unordered : i32 - // CHECK: atomicrmw max i32* %{{.*}}, i32 %{{.*}} unordered - %9 = llvm.atomicrmw max %i32_ptr, %i32 unordered : i32 - // CHECK: atomicrmw min i32* %{{.*}}, i32 %{{.*}} unordered - %10 = llvm.atomicrmw min %i32_ptr, %i32 unordered : i32 - // CHECK: atomicrmw umax i32* %{{.*}}, i32 %{{.*}} unordered - %11 = llvm.atomicrmw umax %i32_ptr, %i32 unordered : i32 - // CHECK: atomicrmw umin i32* %{{.*}}, i32 %{{.*}} unordered - %12 = llvm.atomicrmw umin %i32_ptr, %i32 unordered : i32 + // CHECK: atomicrmw or i32* %{{.*}}, i32 %{{.*}} monotonic + %7 = llvm.atomicrmw _or %i32_ptr, %i32 monotonic : i32 + // CHECK: atomicrmw xor i32* %{{.*}}, i32 %{{.*}} monotonic + %8 = llvm.atomicrmw _xor %i32_ptr, %i32 monotonic : i32 + // CHECK: atomicrmw max i32* %{{.*}}, i32 %{{.*}} monotonic + %9 = llvm.atomicrmw max %i32_ptr, %i32 monotonic : i32 + // CHECK: atomicrmw min i32* %{{.*}}, i32 %{{.*}} monotonic + %10 = llvm.atomicrmw min %i32_ptr, %i32 monotonic : i32 + // CHECK: atomicrmw umax i32* %{{.*}}, i32 %{{.*}} monotonic + %11 = llvm.atomicrmw umax %i32_ptr, %i32 monotonic : i32 + // CHECK: atomicrmw umin i32* %{{.*}}, i32 %{{.*}} monotonic + %12 = llvm.atomicrmw umin %i32_ptr, %i32 monotonic : i32 llvm.return } @@ -1168,7 +1168,7 @@ llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personality_v0 } { // CHECK: %[[a1:[0-9]+]] = alloca i8 %0 = llvm.mlir.constant(0 : i32) : i32 - %1 = llvm.mlir.constant("\01") : !llvm.array<1 x i8> + %1 = llvm.mlir.constant(dense<0> : vector<1xi8>) : !llvm.array<1 x i8> %2 = llvm.mlir.addressof @_ZTIi : !llvm.ptr> %3 = llvm.bitcast %2 : !llvm.ptr> to !llvm.ptr %4 = llvm.mlir.null : !llvm.ptr> @@ -1183,7 +1183,7 @@ // CHECK: %{{[0-9]+}} = landingpad { i8*, i32 } // CHECK-NEXT: catch i8** null // CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) -// CHECK-NEXT: filter [1 x i8] c"\01" +// CHECK-NEXT: filter [1 x i8] zeroinitializer %7 = llvm.landingpad (catch %4 : !llvm.ptr>) (catch %3 : !llvm.ptr) (filter %1 : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> // CHECK: br label %[[final:[0-9]+]] llvm.br ^bb3 @@ -1415,7 +1415,7 @@ // ----- // CHECK-LABEL: @switch_args -llvm.func @switch_args(%arg0: i32) { +llvm.func @switch_args(%arg0: i32) -> i32 { %0 = llvm.mlir.constant(5 : i32) : i32 %1 = llvm.mlir.constant(7 : i32) : i32 %2 = llvm.mlir.constant(11 : i32) : i32 @@ -1448,7 +1448,7 @@ } // CHECK-LABEL: @switch_weights -llvm.func @switch_weights(%arg0: i32) { +llvm.func @switch_weights(%arg0: i32) -> i32 { %0 = llvm.mlir.constant(19 : i32) : i32 %1 = llvm.mlir.constant(23 : i32) : i32 %2 = llvm.mlir.constant(29 : i32) : i32 diff --git a/mlir/test/Target/nvvmir.mlir b/mlir/test/Target/nvvmir.mlir --- a/mlir/test/Target/nvvmir.mlir +++ b/mlir/test/Target/nvvmir.mlir @@ -32,7 +32,7 @@ llvm.return %1 : i32 } -llvm.func @llvm.nvvm.barrier0() { +llvm.func @llvm_nvvm_barrier0() { // CHECK: call void @llvm.nvvm.barrier0() nvvm.barrier0 llvm.return @@ -67,7 +67,7 @@ llvm.func @nvvm_mma(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, - %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) { + %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> { // CHECK: call { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32 %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> diff --git a/mlir/test/Target/rocdl.mlir b/mlir/test/Target/rocdl.mlir --- a/mlir/test/Target/rocdl.mlir +++ b/mlir/test/Target/rocdl.mlir @@ -43,109 +43,111 @@ } llvm.func @rocdl.xdlops(%arg0 : f32, %arg1 : f32, - %arg2 : vector<32 x f32>, %arg3 : i32, + %arg2 : vector<32 x f32>, %arg3: i32, %arg4 : vector<16 x f32>, %arg5 : vector<4xf32>, %arg6 : vector<4xf16>, %arg7 : vector<32 x i32>, %arg8 : vector<16 x i32>, %arg9 : vector<4xi32>, %arg10 : vector<2xi16>) -> vector<32 x f32> { + %csti32 = llvm.mlir.constant(42 : i32) : i32 + // CHECK-LABEL: rocdl.xdlops - // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %{{.*}}, float %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r0 = rocdl.mfma.f32.32x32x1f32 %arg0, %arg1, %arg2, %arg3, %arg3, %arg3 : + // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %{{.*}}, float %{{.*}}, <32 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r0 = rocdl.mfma.f32.32x32x1f32 %arg0, %arg1, %arg2, %csti32, %csti32, %csti32 : (f32, f32, vector<32 x f32>, i32, i32, i32) -> vector<32 x f32> - // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r1 = rocdl.mfma.f32.16x16x1f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 : + // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r1 = rocdl.mfma.f32.16x16x1f32 %arg0, %arg1, %arg4, %csti32, %csti32, %csti32 : (f32, f32, vector<16 x f32>, i32, i32, i32) -> vector<16 x f32> - // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r2 = rocdl.mfma.f32.16x16x4f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 : + // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r2 = rocdl.mfma.f32.16x16x4f32 %arg0, %arg1, %arg5, %csti32, %csti32, %csti32 : (f32, f32, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r3 = rocdl.mfma.f32.4x4x1f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 : + // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r3 = rocdl.mfma.f32.4x4x1f32 %arg0, %arg1, %arg5, %csti32, %csti32, %csti32 : (f32, f32, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r4= rocdl.mfma.f32.32x32x2f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 : + // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r4= rocdl.mfma.f32.32x32x2f32 %arg0, %arg1, %arg4, %csti32, %csti32, %csti32 : (f32, f32, vector<16 x f32>, i32, i32, i32) -> vector<16 x f32> - // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r5 = rocdl.mfma.f32.32x32x4f16 %arg6, %arg6, %arg2, %arg3, %arg3, %arg3 : + // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <32 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r5 = rocdl.mfma.f32.32x32x4f16 %arg6, %arg6, %arg2, %csti32, %csti32, %csti32 : (vector<4xf16>, vector<4xf16>, vector<32 x f32>, i32, i32, i32) -> vector<32 x f32> - // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r6 = rocdl.mfma.f32.16x16x4f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 : + // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r6 = rocdl.mfma.f32.16x16x4f16 %arg6, %arg6, %arg4, %csti32, %csti32, %csti32 : (vector<4xf16>, vector<4xf16>, vector<16 x f32>, i32, i32, i32) -> vector<16 x f32> - // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r7 = rocdl.mfma.f32.4x4x4f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 : + // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r7 = rocdl.mfma.f32.4x4x4f16 %arg6, %arg6, %arg5, %csti32, %csti32, %csti32 : (vector<4xf16>, vector<4xf16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r8 = rocdl.mfma.f32.32x32x8f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 : + // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r8 = rocdl.mfma.f32.32x32x8f16 %arg6, %arg6, %arg4, %csti32, %csti32, %csti32 : (vector<4xf16>, vector<4xf16>, vector<16 x f32>, i32, i32, i32) -> vector<16 x f32> - // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r9 = rocdl.mfma.f32.16x16x16f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 : + // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r9 = rocdl.mfma.f32.16x16x16f16 %arg6, %arg6, %arg5, %csti32, %csti32, %csti32 : (vector<4xf16>, vector<4xf16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - // CHECK: call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 %{{.*}}, i32 %{{.*}}, <32 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r10 = rocdl.mfma.i32.32x32x4i8 %arg3, %arg3, %arg7, %arg3, %arg3, %arg3 : + // CHECK: call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 %{{.*}}, i32 %{{.*}}, <32 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r10 = rocdl.mfma.i32.32x32x4i8 %arg3, %arg3, %arg7, %csti32, %csti32, %csti32 : (i32, i32, vector<32 x i32>, i32, i32, i32) -> vector<32 x i32> - // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r11 = rocdl.mfma.i32.16x16x4i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 : + // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r11 = rocdl.mfma.i32.16x16x4i8 %arg3, %arg3, %arg8, %csti32, %csti32, %csti32 : (i32, i32, vector<16 x i32>, i32, i32, i32) -> vector<16 x i32> - // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r12 = rocdl.mfma.i32.4x4x4i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 : + // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r12 = rocdl.mfma.i32.4x4x4i8 %arg3, %arg3, %arg9, %csti32, %csti32, %csti32 : (i32, i32, vector<4xi32>, i32, i32, i32) -> vector<4xi32> - // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r13 = rocdl.mfma.i32.32x32x8i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 : + // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r13 = rocdl.mfma.i32.32x32x8i8 %arg3, %arg3, %arg8, %csti32, %csti32, %csti32 : (i32, i32, vector<16 x i32>, i32, i32, i32) -> vector<16 x i32> - // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r14 = rocdl.mfma.i32.16x16x16i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 : + // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r14 = rocdl.mfma.i32.16x16x16i8 %arg3, %arg3, %arg9, %csti32, %csti32, %csti32 : (i32, i32, vector<4xi32>, i32, i32, i32) -> vector<4xi32> - // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r15 = rocdl.mfma.f32.32x32x2bf16 %arg10, %arg10, %arg2, %arg3, %arg3, %arg3 : + // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <32 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r15 = rocdl.mfma.f32.32x32x2bf16 %arg10, %arg10, %arg2, %csti32, %csti32, %csti32 : (vector<2xi16>, vector<2xi16>, vector<32 x f32>, i32, i32, i32) -> vector<32 x f32> - // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r16 = rocdl.mfma.f32.16x16x2bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 : + // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r16 = rocdl.mfma.f32.16x16x2bf16 %arg10, %arg10, %arg4, %csti32, %csti32, %csti32 : (vector<2xi16>, vector<2xi16>, vector<16 x f32>, i32, i32, i32) -> vector<16 x f32> - // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r17 = rocdl.mfma.f32.4x4x2bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 : + // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r17 = rocdl.mfma.f32.4x4x2bf16 %arg10, %arg10, %arg5, %csti32, %csti32, %csti32 : (vector<2xi16>, vector<2xi16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r18 = rocdl.mfma.f32.32x32x4bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 : + // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r18 = rocdl.mfma.f32.32x32x4bf16 %arg10, %arg10, %arg4, %csti32, %csti32, %csti32 : (vector<2xi16>, vector<2xi16>, vector<16 x f32>, i32, i32, i32) -> vector<16 x f32> - // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %r19 = rocdl.mfma.f32.16x16x8bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 : + // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) + %r19 = rocdl.mfma.f32.16x16x8bf16 %arg10, %arg10, %arg5, %csti32, %csti32, %csti32 : (vector<2xi16>, vector<2xi16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> @@ -153,22 +155,23 @@ } llvm.func @rocdl.mubuf(%rsrc : vector<4xi32>, %vindex : i32, - %offset : i32, %glc : i1, - %slc : i1, %vdata1 : vector<1xf32>, + %offset : i32, %vdata1 : vector<1xf32>, %vdata2 : vector<2xf32>, %vdata4 : vector<4xf32>) { + %glc = llvm.mlir.constant(false) : i1 + %slc = llvm.mlir.constant(true) : i1 // CHECK-LABEL: rocdl.mubuf - // CHECK: call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) + // CHECK: call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}}) %r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32> - // CHECK: call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) + // CHECK: call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}}) %r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32> - // CHECK: call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) + // CHECK: call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}}) %r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32> - // CHECK: call void @llvm.amdgcn.buffer.store.v1f32(<1 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) + // CHECK: call void @llvm.amdgcn.buffer.store.v1f32(<1 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}}) rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32> - // CHECK: call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) + // CHECK: call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}}) rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32> - // CHECK: call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) + // CHECK: call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}}) rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32> llvm.return