diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -654,6 +654,7 @@
 
   let parser = [{ return parseReturnOp(parser, result); }];
   let printer = [{ printReturnOp(p, *this); }];
+  let verifier = [{ return ::verify(*this); }];
 }
 def LLVM_ResumeOp : LLVM_TerminatorOp<"resume", []> {
   let arguments = (ins LLVM_Type:$value);
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -1116,10 +1116,10 @@
 }
 
 //===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::ReturnOp.
+// Printing, parsing and verification for LLVM::ReturnOp.
 //===----------------------------------------------------------------------===//
 
-static void printReturnOp(OpAsmPrinter &p, ReturnOp &op) {
+static void printReturnOp(OpAsmPrinter &p, ReturnOp op) {
   p << op.getOperationName();
   p.printOptionalAttrDict(op.getAttrs());
   assert(op.getNumOperands() <= 1);
@@ -1148,6 +1148,35 @@
   return success();
 }
 
+static LogicalResult verify(ReturnOp op) {
+  if (op->getNumOperands() > 1)
+    return op->emitOpError("expected at most 1 operand");
+
+  if (auto parent = op->getParentOfType<LLVMFuncOp>()) {
+    Type expectedType = parent.getType().getReturnType();
+    if (expectedType.isa<LLVMVoidType>()) {
+      if (op->getNumOperands() == 0)
+        return success();
+      InFlightDiagnostic diag = op->emitOpError("expected no operands");
+      diag.attachNote(parent->getLoc()) << "when returning from function";
+      return diag;
+    }
+    if (op->getNumOperands() == 0) {
+      if (expectedType.isa<LLVMVoidType>())
+        return success();
+      InFlightDiagnostic diag = op->emitOpError("expected 1 operand");
+      diag.attachNote(parent->getLoc()) << "when returning from function";
+      return diag;
+    }
+    if (expectedType != op->getOperand(0).getType()) {
+      InFlightDiagnostic diag = op->emitOpError("mismatching result types");
+      diag.attachNote(parent->getLoc()) << "when returning from function";
+      return diag;
+    }
+  }
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // Verifier for LLVM::AddressOfOp.
 //===----------------------------------------------------------------------===//
@@ -1528,6 +1557,20 @@
   return success();
 }
 
+static bool isZeroAttribute(Attribute value) {
+  if (auto intValue = value.dyn_cast<IntegerAttr>())
+    return intValue.getValue().isNullValue();
+  if (auto fpValue = value.dyn_cast<FloatAttr>())
+    return fpValue.getValue().isZero();
+  if (auto splatValue = value.dyn_cast<SplatElementsAttr>())
+    return isZeroAttribute(splatValue.getSplatValue());
+  if (auto elementsValue = value.dyn_cast<ElementsAttr>())
+    return llvm::all_of(elementsValue.getValues<Attribute>(), isZeroAttribute);
+  if (auto arrayValue = value.dyn_cast<ArrayAttr>())
+    return llvm::all_of(arrayValue.getValue(), isZeroAttribute);
+  return false;
+}
+
 static LogicalResult verify(GlobalOp op) {
   if (!LLVMPointerType::isValidElementType(op.getType()))
     return op.emitOpError(
@@ -1558,6 +1601,25 @@
     if (op.getValueOrNull())
       return op.emitOpError("cannot have both initializer value and region");
   }
+
+  if (op.linkage() == Linkage::Common) {
+    if (Attribute value = op.getValueOrNull()) {
+      if (!isZeroAttribute(value)) {
+        return op.emitOpError()
+               << "expected zero value for '"
+               << stringifyLinkage(Linkage::Common) << "' linkage";
+      }
+    }
+  }
+
+  if (op.linkage() == Linkage::Appending) {
+    if (!op.getType().isa<LLVMArrayType>()) {
+      return op.emitOpError()
+             << "expected array type for '"
+             << stringifyLinkage(Linkage::Appending) << "' linkage";
+    }
+  }
+
   return success();
 }
 
@@ -1840,8 +1902,17 @@
 //===----------------------------------------------------------------------===//
 
 static LogicalResult verify(LLVM::ConstantOp op) {
-  if (!(op.value().isa<IntegerAttr>() || op.value().isa<FloatAttr>() ||
-        op.value().isa<ElementsAttr>() || op.value().isa<StringAttr>()))
+  if (StringAttr sAttr = op.value().dyn_cast<StringAttr>()) {
+    auto arrayType = op.getType().dyn_cast<LLVMArrayType>();
+    if (!arrayType || arrayType.getNumElements() != sAttr.getValue().size() ||
+        !arrayType.getElementType().isInteger(8)) {
+      return op->emitOpError()
+             << "expected array type of " << sAttr.getValue().size()
+             << " i8 elements for the string constant";
+    }
+    return success();
+  }
+  if (!op.value().isa<IntegerAttr, FloatAttr, ElementsAttr>())
     return op.emitOpError()
            << "only supports integer, float, string or elements attributes";
   return success();
@@ -1964,6 +2035,14 @@
         intBitWidth != 64)
       return op.emitOpError("expected LLVM IR integer type");
   }
+
+  if (static_cast<unsigned>(op.ordering()) <
+      static_cast<unsigned>(AtomicOrdering::monotonic))
+    return op.emitOpError()
+           << "expected at least '"
+           << stringifyAtomicOrdering(AtomicOrdering::monotonic)
+           << "' ordering";
+
   return success();
 }
 
diff --git a/mlir/test/Dialect/LLVMIR/global.mlir b/mlir/test/Dialect/LLVMIR/global.mlir
--- a/mlir/test/Dialect/LLVMIR/global.mlir
+++ b/mlir/test/Dialect/LLVMIR/global.mlir
@@ -44,7 +44,7 @@
 // CHECK: llvm.mlir.global common
 llvm.mlir.global common @common() : i64
 // CHECK: llvm.mlir.global appending
-llvm.mlir.global appending @appending() : i64
+llvm.mlir.global appending @appending() : !llvm.array<2 x i64>
 // CHECK: llvm.mlir.global extern_weak
 llvm.mlir.global extern_weak @extern_weak() : i64
 // CHECK: llvm.mlir.global linkonce_odr
diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir
--- a/mlir/test/Dialect/LLVMIR/invalid.mlir
+++ b/mlir/test/Dialect/LLVMIR/invalid.mlir
@@ -222,6 +222,30 @@
 
 // -----
 
+llvm.func @void_func_result(%arg0: i32) {
+  // expected-error@below {{expected no operands}}
+  // expected-note@above {{when returning from function}}
+  llvm.return %arg0: i32
+}
+
+// -----
+
+llvm.func @non_void_func_no_result() -> i32 {
+  // expected-error@below {{expected 1 operand}}
+  // expected-note@above {{when returning from function}}
+  llvm.return
+}
+
+// -----
+
+llvm.func @func_result_mismatch(%arg0: f32) -> i32 {
+  // expected-error@below {{mismatching result types}}
+  // expected-note@above {{when returning from function}}
+  llvm.return %arg0 : f32
+}
+
+// -----
+
 func @constant_wrong_type() {
   // expected-error@+1 {{only supports integer, float, string or elements attributes}}
   llvm.mlir.constant(@constant_wrong_type) : !llvm.ptr<func<void ()>>
@@ -229,6 +253,13 @@
 
 // -----
 
+func @constant_wrong_type_string() {
+  // expected-error@below {{expected array type of 3 i8 elements for the string constant}}
+  llvm.mlir.constant("foo") : !llvm.ptr<i8>
+}
+
+// -----
+
 func @insertvalue_non_llvm_type(%a : i32, %b : i32) {
   // expected-error@+1 {{expected LLVM IR Dialect type}}
   llvm.insertvalue %a, %b[0] : tensor<*xi32>
@@ -561,7 +592,7 @@
 llvm.func @foo(i32) -> i32
 llvm.func @__gxx_personality_v0(...) -> i32
 
-llvm.func @bad_landingpad(%arg0: !llvm.ptr<ptr<i8>>) attributes { personality = @__gxx_personality_v0} {
+llvm.func @bad_landingpad(%arg0: !llvm.ptr<ptr<i8>>) -> i32 attributes { personality = @__gxx_personality_v0} {
   %0 = llvm.mlir.constant(3 : i32) : i32
   %1 = llvm.mlir.constant(2 : i32) : i32
   %2 = llvm.invoke @foo(%1) to ^bb1 unwind ^bb2 : (i32) -> i32
@@ -667,3 +698,18 @@
 ^bb2(%1: i32, %2: i32): // pred: ^bb0
   llvm.return
 }
+
+// -----
+
+// expected-error@below {{expected zero value for 'common' linkage}}
+llvm.mlir.global common @non_zero_global_common_linkage(42 : i32) : i32
+
+// -----
+
+// expected-error@below {{expected zero value for 'common' linkage}}
+llvm.mlir.global common @non_zero_compound_global_common_linkage(dense<[0, 0, 0, 1, 0]> : vector<5xi32>) : !llvm.array<5 x i32>
+
+// -----
+
+// expected-error@below {{expected array type for 'appending' linkage}}
+llvm.mlir.global appending @non_array_type_global_appending_linkage() : i32
diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir
--- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir
+++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir
@@ -284,8 +284,8 @@
 
 // CHECK-LABEL: @atomicrmw
 func @atomicrmw(%ptr : !llvm.ptr<f32>, %val : f32) {
-  // CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} unordered : f32
-  %0 = llvm.atomicrmw fadd %ptr, %val unordered : f32
+  // CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} monotonic : f32
+  %0 = llvm.atomicrmw fadd %ptr, %val monotonic : f32
   llvm.return
 }
 
diff --git a/mlir/test/Target/avx512.mlir b/mlir/test/Target/avx512.mlir
--- a/mlir/test/Target/avx512.mlir
+++ b/mlir/test/Target/avx512.mlir
@@ -2,10 +2,10 @@
 
 // CHECK-LABEL: define <16 x float> @LLVM_x86_avx512_mask_ps_512
 llvm.func @LLVM_x86_avx512_mask_ps_512(%a: vector<16 x f32>,
-                                       %b: i32,
                                        %c: i16)
   -> (vector<16 x f32>)
 {
+  %b = llvm.mlir.constant(42 : i32) : i32
   // CHECK: call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>
   %0 = "llvm_avx512.mask.rndscale.ps.512"(%a, %b, %a, %c, %b) :
     (vector<16 x f32>, i32, vector<16 x f32>, i16, i32) -> vector<16 x f32>
@@ -17,10 +17,10 @@
 
 // CHECK-LABEL: define <8 x double> @LLVM_x86_avx512_mask_pd_512
 llvm.func @LLVM_x86_avx512_mask_pd_512(%a: vector<8xf64>,
-                                       %b: i32,
                                        %c: i8)
   -> (vector<8xf64>)
 {
+  %b = llvm.mlir.constant(42 : i32) : i32
   // CHECK: call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>
   %0 = "llvm_avx512.mask.rndscale.pd.512"(%a, %b, %a, %c, %b) :
     (vector<8xf64>, i32, vector<8xf64>, i8, i32) -> vector<8xf64>
@@ -30,22 +30,22 @@
   llvm.return %1: vector<8xf64>
 }
 
-// CHECK-LABEL: define <{ i16, i16 }> @LLVM_x86_vp2intersect_d_512
+// CHECK-LABEL: define { <16 x i1>, <16 x i1> } @LLVM_x86_vp2intersect_d_512
 llvm.func @LLVM_x86_vp2intersect_d_512(%a: vector<16xi32>, %b: vector<16xi32>)
-  -> !llvm.struct<packed (i16, i16)>
+  -> !llvm.struct<(vector<16 x i1>, vector<16 x i1>)>
 {
   // CHECK: call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32>
   %0 = "llvm_avx512.vp2intersect.d.512"(%a, %b) :
-    (vector<16xi32>, vector<16xi32>) -> !llvm.struct<packed (i16, i16)>
-  llvm.return %0 : !llvm.struct<packed (i16, i16)>
+    (vector<16xi32>, vector<16xi32>) -> !llvm.struct<(vector<16 x i1>, vector<16 x i1>)>
+  llvm.return %0 : !llvm.struct<(vector<16 x i1>, vector<16 x i1>)>
 }
 
-// CHECK-LABEL: define <{ i8, i8 }> @LLVM_x86_vp2intersect_q_512
+// CHECK-LABEL: define { <8 x i1>, <8 x i1> } @LLVM_x86_vp2intersect_q_512
 llvm.func @LLVM_x86_vp2intersect_q_512(%a: vector<8xi64>, %b: vector<8xi64>)
-  -> !llvm.struct<packed (i8, i8)>
+  -> !llvm.struct<(vector<8 x i1>, vector<8 x i1>)>
 {
   // CHECK: call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64>
   %0 = "llvm_avx512.vp2intersect.q.512"(%a, %b) :
-    (vector<8xi64>, vector<8xi64>) -> !llvm.struct<packed (i8, i8)>
-  llvm.return %0 : !llvm.struct<packed (i8, i8)>
+    (vector<8xi64>, vector<8xi64>) -> !llvm.struct<(vector<8 x i1>, vector<8 x i1>)>
+  llvm.return %0 : !llvm.struct<(vector<8 x i1>, vector<8 x i1>)>
 }
diff --git a/mlir/test/Target/import.ll b/mlir/test/Target/import.ll
--- a/mlir/test/Target/import.ll
+++ b/mlir/test/Target/import.ll
@@ -36,10 +36,10 @@
 @linkonce = linkonce global i32 42
 ; CHECK: llvm.mlir.global weak @weak(42 : i32) : i32
 @weak = weak global i32 42
-; CHECK: llvm.mlir.global common @common(42 : i32) : i32
-@common = common global i32 42
-; CHECK: llvm.mlir.global appending @appending(42 : i32) : i32
-@appending = appending global i32 42
+; CHECK: llvm.mlir.global common @common(0 : i32) : i32
+@common = common global i32 zeroinitializer
+; CHECK: llvm.mlir.global appending @appending(dense<[0, 1]> : tensor<2xi32>) : !llvm.array<2 x i32>
+@appending = appending global [2 x i32] [i32 0, i32 1]
 ; CHECK: llvm.mlir.global extern_weak @extern_weak() : i32
 @extern_weak = extern_weak global i32
 ; CHECK: llvm.mlir.global linkonce_odr @linkonce_odr(42 : i32) : i32
diff --git a/mlir/test/Target/llvmir-intrinsics.mlir b/mlir/test/Target/llvmir-intrinsics.mlir
--- a/mlir/test/Target/llvmir-intrinsics.mlir
+++ b/mlir/test/Target/llvmir-intrinsics.mlir
@@ -302,12 +302,13 @@
 }
 
 // CHECK-LABEL: @memcpy_test
-llvm.func @memcpy_test(%arg0: i32, %arg1: i1, %arg2: !llvm.ptr<i8>, %arg3: !llvm.ptr<i8>) {
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 %{{.*}}, i1 %{{.*}})
-  "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg1) : (!llvm.ptr<i8>, !llvm.ptr<i8>, i32, i1) -> ()
+llvm.func @memcpy_test(%arg0: i32, %arg2: !llvm.ptr<i8>, %arg3: !llvm.ptr<i8>) {
+  %i1 = llvm.mlir.constant(false) : i1
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 %{{.*}}, i1 {{.*}})
+  "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %i1) : (!llvm.ptr<i8>, !llvm.ptr<i8>, i32, i1) -> ()
   %sz = llvm.mlir.constant(10: i64) : i64
-  // CHECK: call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 10, i1 %{{.*}})
-  "llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %arg1) : (!llvm.ptr<i8>, !llvm.ptr<i8>, i64, i1) -> ()
+  // CHECK: call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 10, i1 {{.*}})
+  "llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %i1) : (!llvm.ptr<i8>, !llvm.ptr<i8>, i64, i1) -> ()
   llvm.return
 }
 
@@ -368,14 +369,17 @@
 // CHECK-LABEL: @coro_id
 llvm.func @coro_id(%arg0: i32, %arg1: !llvm.ptr<i8>) {
   // CHECK: call token @llvm.coro.id
-  llvm.intr.coro.id %arg0, %arg1, %arg1, %arg1 : !llvm.token
+  %null = llvm.mlir.null : !llvm.ptr<i8>
+  llvm.intr.coro.id %arg0, %arg1, %arg1, %null : !llvm.token
   llvm.return
 }
 
 // CHECK-LABEL: @coro_begin
-llvm.func @coro_begin(%arg0: !llvm.token, %arg1: !llvm.ptr<i8>) {
+llvm.func @coro_begin(%arg0: i32, %arg1: !llvm.ptr<i8>) {
+  %null = llvm.mlir.null : !llvm.ptr<i8>
+  %token = llvm.intr.coro.id %arg0, %arg1, %arg1, %null : !llvm.token
   // CHECK: call i8* @llvm.coro.begin
-  llvm.intr.coro.begin %arg0, %arg1 : !llvm.ptr<i8>
+  llvm.intr.coro.begin %token, %arg1 : !llvm.ptr<i8>
   llvm.return
 }
 
@@ -396,9 +400,11 @@
 }
 
 // CHECK-LABEL: @coro_suspend
-llvm.func @coro_suspend(%arg0: !llvm.token, %arg1 : i1) {
+llvm.func @coro_suspend(%arg0: i32, %arg1 : i1, %arg2 : !llvm.ptr<i8>) {
+  %null = llvm.mlir.null : !llvm.ptr<i8>
+  %token = llvm.intr.coro.id %arg0, %arg2, %arg2, %null : !llvm.token
   // CHECK: call i8 @llvm.coro.suspend
-  %0 = llvm.intr.coro.suspend %arg0, %arg1 : i8
+  %0 = llvm.intr.coro.suspend %token, %arg1 : i8
   llvm.return
 }
 
@@ -410,9 +416,11 @@
 }
 
 // CHECK-LABEL: @coro_free
-llvm.func @coro_free(%arg0: !llvm.token, %arg1 : !llvm.ptr<i8>) {
+llvm.func @coro_free(%arg0: i32, %arg1 : !llvm.ptr<i8>) {
+  %null = llvm.mlir.null : !llvm.ptr<i8>
+  %token = llvm.intr.coro.id %arg0, %arg1, %arg1, %null : !llvm.token
   // CHECK: call i8* @llvm.coro.free
-  %0 = llvm.intr.coro.free %arg0, %arg1 : !llvm.ptr<i8>
+  %0 = llvm.intr.coro.free %token, %arg1 : !llvm.ptr<i8>
   llvm.return
 }
 
diff --git a/mlir/test/Target/llvmir.mlir b/mlir/test/Target/llvmir.mlir
--- a/mlir/test/Target/llvmir.mlir
+++ b/mlir/test/Target/llvmir.mlir
@@ -46,10 +46,10 @@
 llvm.mlir.global linkonce @linkonce(42 : i32) : i32
 // CHECK: @weak = weak global i32 42
 llvm.mlir.global weak @weak(42 : i32) : i32
-// CHECK: @common = common global i32 42
-llvm.mlir.global common @common(42 : i32) : i32
-// CHECK: @appending = appending global i32 42
-llvm.mlir.global appending @appending(42 : i32) : i32
+// CHECK: @common = common global i32 0
+llvm.mlir.global common @common(0 : i32) : i32
+// CHECK: @appending = appending global [3 x i32] [i32 1, i32 2, i32 3]
+llvm.mlir.global appending @appending(dense<[1,2,3]> : vector<3xi32>) : !llvm.array<3xi32>
 // CHECK: @extern_weak = extern_weak global i32
 llvm.mlir.global extern_weak @extern_weak() : i32
 // CHECK: @linkonce_odr = linkonce_odr global i32 42
@@ -984,10 +984,10 @@
   llvm.return %1 : !llvm.ptr<i32, 2>
 }
 
-llvm.func @stringconstant() -> !llvm.ptr<i8> {
-  %1 = llvm.mlir.constant("Hello world!") : !llvm.ptr<i8>
+llvm.func @stringconstant() -> !llvm.array<12 x i8> {
+  %1 = llvm.mlir.constant("Hello world!") : !llvm.array<12 x i8>
   // CHECK: ret [12 x i8] c"Hello world!"
-  llvm.return %1 : !llvm.ptr<i8>
+  llvm.return %1 : !llvm.array<12 x i8>
 }
 
 llvm.func @noreach() {
@@ -1119,10 +1119,10 @@
 llvm.func @atomicrmw(
     %f32_ptr : !llvm.ptr<f32>, %f32 : f32,
     %i32_ptr : !llvm.ptr<i32>, %i32 : i32) {
-  // CHECK: atomicrmw fadd float* %{{.*}}, float %{{.*}} unordered
-  %0 = llvm.atomicrmw fadd %f32_ptr, %f32 unordered : f32
-  // CHECK: atomicrmw fsub float* %{{.*}}, float %{{.*}} unordered
-  %1 = llvm.atomicrmw fsub %f32_ptr, %f32 unordered : f32
+  // CHECK: atomicrmw fadd float* %{{.*}}, float %{{.*}} monotonic
+  %0 = llvm.atomicrmw fadd %f32_ptr, %f32 monotonic : f32
+  // CHECK: atomicrmw fsub float* %{{.*}}, float %{{.*}} monotonic
+  %1 = llvm.atomicrmw fsub %f32_ptr, %f32 monotonic : f32
   // CHECK: atomicrmw xchg float* %{{.*}}, float %{{.*}} monotonic
   %2 = llvm.atomicrmw xchg %f32_ptr, %f32 monotonic : f32
   // CHECK: atomicrmw add i32* %{{.*}}, i32 %{{.*}} acquire
@@ -1133,18 +1133,18 @@
   %5 = llvm.atomicrmw _and %i32_ptr, %i32 acq_rel : i32
   // CHECK: atomicrmw nand i32* %{{.*}}, i32 %{{.*}} seq_cst
   %6 = llvm.atomicrmw nand %i32_ptr, %i32 seq_cst : i32
-  // CHECK: atomicrmw or i32* %{{.*}}, i32 %{{.*}} unordered
-  %7 = llvm.atomicrmw _or %i32_ptr, %i32 unordered : i32
-  // CHECK: atomicrmw xor i32* %{{.*}}, i32 %{{.*}} unordered
-  %8 = llvm.atomicrmw _xor %i32_ptr, %i32 unordered : i32
-  // CHECK: atomicrmw max i32* %{{.*}}, i32 %{{.*}} unordered
-  %9 = llvm.atomicrmw max %i32_ptr, %i32 unordered : i32
-  // CHECK: atomicrmw min i32* %{{.*}}, i32 %{{.*}} unordered
-  %10 = llvm.atomicrmw min %i32_ptr, %i32 unordered : i32
-  // CHECK: atomicrmw umax i32* %{{.*}}, i32 %{{.*}} unordered
-  %11 = llvm.atomicrmw umax %i32_ptr, %i32 unordered : i32
-  // CHECK: atomicrmw umin i32* %{{.*}}, i32 %{{.*}} unordered
-  %12 = llvm.atomicrmw umin %i32_ptr, %i32 unordered : i32
+  // CHECK: atomicrmw or i32* %{{.*}}, i32 %{{.*}} monotonic
+  %7 = llvm.atomicrmw _or %i32_ptr, %i32 monotonic : i32
+  // CHECK: atomicrmw xor i32* %{{.*}}, i32 %{{.*}} monotonic
+  %8 = llvm.atomicrmw _xor %i32_ptr, %i32 monotonic : i32
+  // CHECK: atomicrmw max i32* %{{.*}}, i32 %{{.*}} monotonic
+  %9 = llvm.atomicrmw max %i32_ptr, %i32 monotonic : i32
+  // CHECK: atomicrmw min i32* %{{.*}}, i32 %{{.*}} monotonic
+  %10 = llvm.atomicrmw min %i32_ptr, %i32 monotonic : i32
+  // CHECK: atomicrmw umax i32* %{{.*}}, i32 %{{.*}} monotonic
+  %11 = llvm.atomicrmw umax %i32_ptr, %i32 monotonic : i32
+  // CHECK: atomicrmw umin i32* %{{.*}}, i32 %{{.*}} monotonic
+  %12 = llvm.atomicrmw umin %i32_ptr, %i32 monotonic : i32
   llvm.return
 }
 
@@ -1168,7 +1168,7 @@
 llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personality_v0 } {
 // CHECK: %[[a1:[0-9]+]] = alloca i8
   %0 = llvm.mlir.constant(0 : i32) : i32
-  %1 = llvm.mlir.constant("\01") : !llvm.array<1 x i8>
+  %1 = llvm.mlir.constant(dense<0> : vector<1xi8>) : !llvm.array<1 x i8>
   %2 = llvm.mlir.addressof @_ZTIi : !llvm.ptr<ptr<i8>>
   %3 = llvm.bitcast %2 : !llvm.ptr<ptr<i8>> to !llvm.ptr<i8>
   %4 = llvm.mlir.null : !llvm.ptr<ptr<i8>>
@@ -1183,7 +1183,7 @@
 // CHECK: %{{[0-9]+}} = landingpad { i8*, i32 }
 // CHECK-NEXT:             catch i8** null
 // CHECK-NEXT:             catch i8* bitcast (i8** @_ZTIi to i8*)
-// CHECK-NEXT:             filter [1 x i8] c"\01"
+// CHECK-NEXT:             filter [1 x i8] zeroinitializer
   %7 = llvm.landingpad (catch %4 : !llvm.ptr<ptr<i8>>) (catch %3 : !llvm.ptr<i8>) (filter %1 : !llvm.array<1 x i8>) : !llvm.struct<(ptr<i8>, i32)>
 // CHECK: br label %[[final:[0-9]+]]
   llvm.br ^bb3
@@ -1415,7 +1415,7 @@
 // -----
 
 // CHECK-LABEL: @switch_args
-llvm.func @switch_args(%arg0: i32) {
+llvm.func @switch_args(%arg0: i32) -> i32 {
   %0 = llvm.mlir.constant(5 : i32) : i32
   %1 = llvm.mlir.constant(7 : i32) : i32
   %2 = llvm.mlir.constant(11 : i32) : i32
@@ -1448,7 +1448,7 @@
 }
 
 // CHECK-LABEL: @switch_weights
-llvm.func @switch_weights(%arg0: i32) {
+llvm.func @switch_weights(%arg0: i32) -> i32 {
   %0 = llvm.mlir.constant(19 : i32) : i32
   %1 = llvm.mlir.constant(23 : i32) : i32
   %2 = llvm.mlir.constant(29 : i32) : i32
diff --git a/mlir/test/Target/nvvmir.mlir b/mlir/test/Target/nvvmir.mlir
--- a/mlir/test/Target/nvvmir.mlir
+++ b/mlir/test/Target/nvvmir.mlir
@@ -32,7 +32,7 @@
   llvm.return %1 : i32
 }
 
-llvm.func @llvm.nvvm.barrier0() {
+llvm.func @llvm_nvvm_barrier0() {
   // CHECK: call void @llvm.nvvm.barrier0()
   nvvm.barrier0
   llvm.return
@@ -67,7 +67,7 @@
 llvm.func @nvvm_mma(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
                     %b0 : vector<2xf16>, %b1 : vector<2xf16>,
                     %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32,
-                    %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
+                    %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> {
   // CHECK: call { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32
   %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
   llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
diff --git a/mlir/test/Target/rocdl.mlir b/mlir/test/Target/rocdl.mlir
--- a/mlir/test/Target/rocdl.mlir
+++ b/mlir/test/Target/rocdl.mlir
@@ -43,109 +43,111 @@
 }
 
 llvm.func @rocdl.xdlops(%arg0 : f32, %arg1 : f32,
-                   %arg2 : vector<32 x f32>, %arg3 : i32,
+                   %arg2 : vector<32 x f32>, %arg3: i32,
                    %arg4 : vector<16 x f32>, %arg5 : vector<4xf32>,
                    %arg6 : vector<4xf16>, %arg7 : vector<32 x i32>,
                    %arg8 : vector<16 x i32>, %arg9 : vector<4xi32>,
                    %arg10 : vector<2xi16>) -> vector<32 x f32> {
+  %csti32 = llvm.mlir.constant(42 : i32) : i32
+
   // CHECK-LABEL: rocdl.xdlops
-  // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %{{.*}}, float %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r0 = rocdl.mfma.f32.32x32x1f32 %arg0, %arg1, %arg2, %arg3, %arg3, %arg3 :
+  // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %{{.*}}, float %{{.*}}, <32 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r0 = rocdl.mfma.f32.32x32x1f32 %arg0, %arg1, %arg2, %csti32, %csti32, %csti32 :
                             (f32, f32, vector<32 x f32>,
                             i32, i32, i32) -> vector<32 x f32>
 
-  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r1 = rocdl.mfma.f32.16x16x1f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 :
+  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r1 = rocdl.mfma.f32.16x16x1f32 %arg0, %arg1, %arg4, %csti32, %csti32, %csti32 :
                             (f32, f32, vector<16 x f32>,
                             i32, i32, i32) -> vector<16 x f32>
 
-  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r2 = rocdl.mfma.f32.16x16x4f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 :
+  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r2 = rocdl.mfma.f32.16x16x4f32 %arg0, %arg1, %arg5, %csti32, %csti32, %csti32 :
                             (f32, f32, vector<4xf32>,
                             i32, i32, i32) -> vector<4xf32>
 
-  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r3 = rocdl.mfma.f32.4x4x1f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 :
+  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r3 = rocdl.mfma.f32.4x4x1f32 %arg0, %arg1, %arg5, %csti32, %csti32, %csti32 :
                             (f32, f32, vector<4xf32>,
                             i32, i32, i32) -> vector<4xf32>
 
-  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r4= rocdl.mfma.f32.32x32x2f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 :
+  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r4= rocdl.mfma.f32.32x32x2f32 %arg0, %arg1, %arg4, %csti32, %csti32, %csti32 :
                             (f32, f32, vector<16 x f32>,
                             i32, i32, i32) -> vector<16 x f32>
 
-  // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r5 = rocdl.mfma.f32.32x32x4f16 %arg6, %arg6, %arg2, %arg3, %arg3, %arg3 :
+  // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <32 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r5 = rocdl.mfma.f32.32x32x4f16 %arg6, %arg6, %arg2, %csti32, %csti32, %csti32 :
                             (vector<4xf16>, vector<4xf16>, vector<32 x f32>,
                             i32, i32, i32) -> vector<32 x f32>
 
-  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r6 = rocdl.mfma.f32.16x16x4f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 :
+  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r6 = rocdl.mfma.f32.16x16x4f16 %arg6, %arg6, %arg4, %csti32, %csti32, %csti32 :
                             (vector<4xf16>, vector<4xf16>, vector<16 x f32>,
                             i32, i32, i32) -> vector<16 x f32>
 
-  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r7 = rocdl.mfma.f32.4x4x4f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 :
+  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r7 = rocdl.mfma.f32.4x4x4f16 %arg6, %arg6, %arg5, %csti32, %csti32, %csti32 :
                             (vector<4xf16>, vector<4xf16>, vector<4xf32>,
                             i32, i32, i32) -> vector<4xf32>
 
-  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r8 = rocdl.mfma.f32.32x32x8f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 :
+  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r8 = rocdl.mfma.f32.32x32x8f16 %arg6, %arg6, %arg4, %csti32, %csti32, %csti32 :
                             (vector<4xf16>, vector<4xf16>, vector<16 x f32>,
                             i32, i32, i32) -> vector<16 x f32>
 
-  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r9 = rocdl.mfma.f32.16x16x16f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 :
+  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r9 = rocdl.mfma.f32.16x16x16f16 %arg6, %arg6, %arg5, %csti32, %csti32, %csti32 :
                             (vector<4xf16>, vector<4xf16>, vector<4xf32>,
                             i32, i32, i32) -> vector<4xf32>
 
-  // CHECK: call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 %{{.*}}, i32 %{{.*}}, <32 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r10 = rocdl.mfma.i32.32x32x4i8 %arg3, %arg3, %arg7, %arg3, %arg3, %arg3 :
+  // CHECK: call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 %{{.*}}, i32 %{{.*}}, <32 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r10 = rocdl.mfma.i32.32x32x4i8 %arg3, %arg3, %arg7, %csti32, %csti32, %csti32 :
                             (i32, i32, vector<32 x i32>,
                             i32, i32, i32) -> vector<32 x i32>
 
-  // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r11 = rocdl.mfma.i32.16x16x4i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 :
+  // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r11 = rocdl.mfma.i32.16x16x4i8 %arg3, %arg3, %arg8, %csti32, %csti32, %csti32 :
                             (i32, i32, vector<16 x i32>,
                             i32, i32, i32) -> vector<16 x i32>
 
-  // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r12 = rocdl.mfma.i32.4x4x4i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 :
+  // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r12 = rocdl.mfma.i32.4x4x4i8 %arg3, %arg3, %arg9, %csti32, %csti32, %csti32 :
                             (i32, i32, vector<4xi32>,
                             i32, i32, i32) -> vector<4xi32>
 
-  // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r13 = rocdl.mfma.i32.32x32x8i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 :
+  // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r13 = rocdl.mfma.i32.32x32x8i8 %arg3, %arg3, %arg8, %csti32, %csti32, %csti32 :
                             (i32, i32, vector<16 x i32>,
                             i32, i32, i32) -> vector<16 x i32>
 
-  // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r14 = rocdl.mfma.i32.16x16x16i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 :
+  // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r14 = rocdl.mfma.i32.16x16x16i8 %arg3, %arg3, %arg9, %csti32, %csti32, %csti32 :
                             (i32, i32, vector<4xi32>,
                             i32, i32, i32) -> vector<4xi32>
 
-  // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r15 = rocdl.mfma.f32.32x32x2bf16 %arg10, %arg10, %arg2, %arg3, %arg3, %arg3 :
+  // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <32 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r15 = rocdl.mfma.f32.32x32x2bf16 %arg10, %arg10, %arg2, %csti32, %csti32, %csti32 :
                             (vector<2xi16>, vector<2xi16>, vector<32 x f32>,
                             i32, i32, i32) -> vector<32 x f32>
 
-  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r16 = rocdl.mfma.f32.16x16x2bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 :
+  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r16 = rocdl.mfma.f32.16x16x2bf16 %arg10, %arg10, %arg4, %csti32, %csti32, %csti32 :
                             (vector<2xi16>, vector<2xi16>, vector<16 x f32>,
                             i32, i32, i32) -> vector<16 x f32>
 
-  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r17 = rocdl.mfma.f32.4x4x2bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 :
+  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r17 = rocdl.mfma.f32.4x4x2bf16 %arg10, %arg10, %arg5, %csti32, %csti32, %csti32 :
                             (vector<2xi16>, vector<2xi16>, vector<4xf32>,
                             i32, i32, i32) -> vector<4xf32>
 
-  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r18 = rocdl.mfma.f32.32x32x4bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 :
+  // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r18 = rocdl.mfma.f32.32x32x4bf16 %arg10, %arg10, %arg4, %csti32, %csti32, %csti32 :
                             (vector<2xi16>, vector<2xi16>, vector<16 x f32>,
                             i32, i32, i32) -> vector<16 x f32>
 
-  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  %r19 = rocdl.mfma.f32.16x16x8bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 :
+  // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  %r19 = rocdl.mfma.f32.16x16x8bf16 %arg10, %arg10, %arg5, %csti32, %csti32, %csti32 :
                             (vector<2xi16>, vector<2xi16>, vector<4xf32>,
                             i32, i32, i32) -> vector<4xf32>
 
@@ -153,22 +155,23 @@
 }
 
 llvm.func @rocdl.mubuf(%rsrc : vector<4xi32>, %vindex : i32,
-                       %offset : i32, %glc : i1,
-                       %slc : i1, %vdata1 : vector<1xf32>,
+                       %offset : i32, %vdata1 : vector<1xf32>,
                        %vdata2 : vector<2xf32>, %vdata4 : vector<4xf32>) {
+  %glc = llvm.mlir.constant(false) : i1
+  %slc = llvm.mlir.constant(true) : i1
   // CHECK-LABEL: rocdl.mubuf
-  // CHECK: call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
+  // CHECK: call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}})
   %r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32>
-  // CHECK: call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
+  // CHECK: call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}})
   %r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32>
-  // CHECK: call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
+  // CHECK: call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}})
   %r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32>
 
-  // CHECK: call void @llvm.amdgcn.buffer.store.v1f32(<1 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
+  // CHECK: call void @llvm.amdgcn.buffer.store.v1f32(<1 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}})
   rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32>
-  // CHECK: call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
+  // CHECK: call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}})
   rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32>
-  // CHECK: call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
+  // CHECK: call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}})
   rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32>
 
   llvm.return