diff --git a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
--- a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
@@ -32,9 +32,9 @@
 static constexpr llvm::StringRef kPrintClose = "printClose";
 static constexpr llvm::StringRef kPrintComma = "printComma";
 static constexpr llvm::StringRef kPrintNewline = "printNewline";
-static constexpr llvm::StringRef kMalloc = "malloc";
-static constexpr llvm::StringRef kAlignedAlloc = "aligned_alloc";
-static constexpr llvm::StringRef kFree = "free";
+static constexpr llvm::StringRef kMalloc = "_mlir_alloc";
+static constexpr llvm::StringRef kAlignedAlloc = "_mlir_aligned_alloc";
+static constexpr llvm::StringRef kFree = "_mlir_free";
 static constexpr llvm::StringRef kMemRefCopy = "memrefCopy";
 
 /// Generic print function lookupOrCreate helper.
diff --git a/mlir/lib/ExecutionEngine/RunnerUtils.cpp b/mlir/lib/ExecutionEngine/RunnerUtils.cpp
--- a/mlir/lib/ExecutionEngine/RunnerUtils.cpp
+++ b/mlir/lib/ExecutionEngine/RunnerUtils.cpp
@@ -18,6 +18,18 @@
 
 // NOLINTBEGIN(*-identifier-naming)
 
+extern "C" void* _mlir_alloc(uint64_t size) {
+  return malloc(size);
+}
+
+extern "C" void* _mlir_aligned_alloc(uint64_t alignment, uint64_t size) {
+  return aligned_alloc(alignment, size);
+}
+
+extern "C" void _mlir_free(void* ptr) {
+  free(ptr);
+}
+
 extern "C" void _mlir_ciface_printMemrefShapeI8(UnrankedMemRefType<int8_t> *M) {
   std::cout << "Unranked Memref ";
   printMemRefMetaData(std::cout, DynamicMemRefType<int8_t>(*M));
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1118,12 +1118,15 @@
           m->getAttr(LLVM::LLVMDialect::getTargetTripleAttrName()))
     llvmModule->setTargetTriple(targetTripleAttr.cast<StringAttr>().getValue());
 
-  // Inject declarations for `malloc` and `free` functions that can be used in
-  // memref allocation/deallocation coming from standard ops lowering.
+  // Inject declarations for `_mlir_alloc`, `_mlir_aligned_alloc` and `_mlir_free`
+  // functions that can be used in memref allocation / deallocation coming from
+  // standard ops lowering.
   llvm::IRBuilder<> builder(llvmContext);
-  llvmModule->getOrInsertFunction("malloc", builder.getInt8PtrTy(),
+  llvmModule->getOrInsertFunction("_mlir_alloc", builder.getInt8PtrTy(),
                                   builder.getInt64Ty());
-  llvmModule->getOrInsertFunction("free", builder.getVoidTy(),
+  llvmModule->getOrInsertFunction("_mlir_aligned_alloc", builder.getInt8PtrTy(),
+                                  builder.getInt64Ty(), builder.getInt64Ty());
+  llvmModule->getOrInsertFunction("_mlir_free", builder.getVoidTy(),
                                   builder.getInt8PtrTy());
 
   return llvmModule;
diff --git a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
--- a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
+++ b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
@@ -21,7 +21,7 @@
   // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64
   // CHECK: %[[NEGATED_ALIGN:.*]] = llvm.sub %[[C0]], %[[ALIGN]]  : i64
   // CHECK: %[[ROUNDED_SIZE:.*]] = llvm.and %[[SIZE_PLUS_ALIGN_MINUS_ONE]], %[[NEGATED_ALIGN]] : i64
-  // CHECK: %[[ALLOC:.*]] = llvm.call @aligned_alloc(%[[ALIGN]], %[[ROUNDED_SIZE]])
+  // CHECK: %[[ALLOC:.*]] = llvm.call @_mlir_aligned_alloc(%[[ALIGN]], %[[ROUNDED_SIZE]])
   // CHECK: %[[HDL:.*]] = llvm.intr.coro.begin %[[ID]], %[[ALLOC]]
   %1 = async.coro.begin %0
   return
@@ -34,7 +34,7 @@
   // CHECK: %[[HDL:.*]] = llvm.intr.coro.begin
   %1 = async.coro.begin %0
   // CHECK: %[[MEM:.*]] = llvm.intr.coro.free %[[ID]], %[[HDL]]
-  // CHECK: llvm.call @free(%[[MEM]])
+  // CHECK: llvm.call @_mlir_free(%[[MEM]])
   async.coro.free %0, %1
   return
 }
diff --git a/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir b/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
--- a/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
+++ b/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
@@ -61,7 +61,7 @@
 // Delete coroutine.
 // CHECK: ^[[CLEANUP]]:
 // CHECK: %[[MEM:.*]] = llvm.intr.coro.free
-// CHECK: llvm.call @free(%[[MEM]])
+// CHECK: llvm.call @_mlir_free(%[[MEM]])
 
 // Suspend coroutine, and also a return statement for ramp function.
 // CHECK: ^[[SUSPEND]]:
diff --git a/mlir/test/Conversion/FuncToLLVM/calling-convention.mlir b/mlir/test/Conversion/FuncToLLVM/calling-convention.mlir
--- a/mlir/test/Conversion/FuncToLLVM/calling-convention.mlir
+++ b/mlir/test/Conversion/FuncToLLVM/calling-convention.mlir
@@ -135,7 +135,7 @@
   // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOC_SIZE]] x i8
   // CHECK: %[[SOURCE:.*]] = llvm.extractvalue %[[CALL_RES]][1]
   // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[SOURCE]], %[[ALLOC_SIZE]], %[[FALSE]])
-  // CHECK: llvm.call @free(%[[SOURCE]])
+  // CHECK: llvm.call @_mlir_free(%[[SOURCE]])
   // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
   // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RES]][0] : !llvm.struct<(i64, ptr<i8>)>
   // CHECK: %[[DESC_1:.*]] = llvm.insertvalue %[[RANK]], %[[DESC]][0]
@@ -167,7 +167,7 @@
   // CHECK: %[[TABLES_SIZE:.*]] = llvm.mul %[[DOUBLE_RANK_INC]], %[[IDX_SIZE]]
   // CHECK: %[[ALLOC_SIZE:.*]] = llvm.add %[[DOUBLE_PTR_SIZE]], %[[TABLES_SIZE]]
   // CHECK: %[[FALSE:.*]] = llvm.mlir.constant(false)
-  // CHECK: %[[ALLOCATED:.*]] = llvm.call @malloc(%[[ALLOC_SIZE]])
+  // CHECK: %[[ALLOCATED:.*]] = llvm.call @_mlir_alloc(%[[ALLOC_SIZE]])
   // CHECK: "llvm.intr.memcpy"(%[[ALLOCATED]], %[[MEMORY]], %[[ALLOC_SIZE]], %[[FALSE]])
   // CHECK: %[[NEW_DESC:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
   // CHECK: %[[NEW_DESC_1:.*]] = llvm.insertvalue %[[RANK]], %[[NEW_DESC]][0]
@@ -193,7 +193,7 @@
   // CHECK: %[[ALLOCA_1:.*]] = llvm.alloca %{{.*}} x i8
   // CHECK: %[[SOURCE_1:.*]] = llvm.extractvalue %[[RES_1:.*]][1] : ![[DESC_TYPE:.*]]
   // CHECK: "llvm.intr.memcpy"(%[[ALLOCA_1]], %[[SOURCE_1]], %{{.*}}, %[[FALSE:.*]])
-  // CHECK: llvm.call @free(%[[SOURCE_1]])
+  // CHECK: llvm.call @_mlir_free(%[[SOURCE_1]])
   // CHECK: %[[DESC_1:.*]] = llvm.mlir.undef : ![[DESC_TYPE]]
   // CHECK: %[[DESC_11:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_1]][0]
   // CHECK: llvm.insertvalue %[[ALLOCA_1]], %[[DESC_11]][1]
@@ -201,7 +201,7 @@
   // CHECK: %[[ALLOCA_2:.*]] = llvm.alloca %{{.*}} x i8
   // CHECK: %[[SOURCE_2:.*]] = llvm.extractvalue %[[RES_2:.*]][1]
   // CHECK: "llvm.intr.memcpy"(%[[ALLOCA_2]], %[[SOURCE_2]], %{{.*}}, %[[FALSE]])
-  // CHECK: llvm.call @free(%[[SOURCE_2]])
+  // CHECK: llvm.call @_mlir_free(%[[SOURCE_2]])
   // CHECK: %[[DESC_2:.*]] = llvm.mlir.undef : ![[DESC_TYPE]]
   // CHECK: %[[DESC_21:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_2]][0]
   // CHECK: llvm.insertvalue %[[ALLOCA_2]], %[[DESC_21]][1]
@@ -222,13 +222,13 @@
   // separately, even if both operands are the same value. The calling
   // convention requires the caller to free them and the caller cannot know
   // whether they are the same value or not.
-  // CHECK: %[[ALLOCATED_1:.*]] = llvm.call @malloc(%{{.*}})
+  // CHECK: %[[ALLOCATED_1:.*]] = llvm.call @_mlir_alloc(%{{.*}})
   // CHECK: "llvm.intr.memcpy"(%[[ALLOCATED_1]], %[[MEMORY]], %{{.*}}, %[[FALSE:.*]])
   // CHECK: %[[RES_1:.*]] = llvm.mlir.undef
   // CHECK: %[[RES_11:.*]] = llvm.insertvalue %{{.*}}, %[[RES_1]][0]
   // CHECK: %[[RES_12:.*]] = llvm.insertvalue %[[ALLOCATED_1]], %[[RES_11]][1]
 
-  // CHECK: %[[ALLOCATED_2:.*]] = llvm.call @malloc(%{{.*}})
+  // CHECK: %[[ALLOCATED_2:.*]] = llvm.call @_mlir_alloc(%{{.*}})
   // CHECK: "llvm.intr.memcpy"(%[[ALLOCATED_2]], %[[MEMORY]], %{{.*}}, %[[FALSE]])
   // CHECK: %[[RES_2:.*]] = llvm.mlir.undef
   // CHECK: %[[RES_21:.*]] = llvm.insertvalue %{{.*}}, %[[RES_2]][0]
diff --git a/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir
--- a/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir
+++ b/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir
@@ -14,7 +14,7 @@
 //  CHECK-NEXT:  %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
 //  CHECK-NEXT:  %[[gep:.*]] = llvm.getelementptr %[[null]][%[[sz]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 //  CHECK-NEXT:  %[[sz_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-//  CHECK-NEXT:  llvm.call @malloc(%[[sz_bytes]]) : (i64) -> !llvm.ptr<i8>
+//  CHECK-NEXT:  llvm.call @_mlir_alloc(%[[sz_bytes]]) : (i64) -> !llvm.ptr<i8>
 //  CHECK-NEXT:  llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
 //  CHECK-NEXT:  llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
 //  CHECK-NEXT:  llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
@@ -37,7 +37,7 @@
 func.func @mixed_dealloc(%arg0: memref<?x42x?xf32>) {
 //      CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
 // CHECK-NEXT:  %[[ptri8:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
-// CHECK-NEXT:  llvm.call @free(%[[ptri8]]) : (!llvm.ptr<i8>) -> ()
+// CHECK-NEXT:  llvm.call @_mlir_free(%[[ptri8]]) : (!llvm.ptr<i8>) -> ()
   memref.dealloc %arg0 : memref<?x42x?xf32>
   return
 }
@@ -54,7 +54,7 @@
 //  CHECK-NEXT:  %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
 //  CHECK-NEXT:  %[[gep:.*]] = llvm.getelementptr %[[null]][%[[sz]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 //  CHECK-NEXT:  %[[sz_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-//  CHECK-NEXT:  llvm.call @malloc(%[[sz_bytes]]) : (i64) -> !llvm.ptr<i8>
+//  CHECK-NEXT:  llvm.call @_mlir_alloc(%[[sz_bytes]]) : (i64) -> !llvm.ptr<i8>
 //  CHECK-NEXT:  llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
 //  CHECK-NEXT:  llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
@@ -110,7 +110,7 @@
 func.func @dynamic_dealloc(%arg0: memref<?x?xf32>) {
 //      CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK-NEXT:  %[[ptri8:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
-// CHECK-NEXT:  llvm.call @free(%[[ptri8]]) : (!llvm.ptr<i8>) -> ()
+// CHECK-NEXT:  llvm.call @_mlir_free(%[[ptri8]]) : (!llvm.ptr<i8>) -> ()
   memref.dealloc %arg0 : memref<?x?xf32>
   return
 }
@@ -128,24 +128,24 @@
 // ALIGNED-ALLOC-NEXT:  %[[gep:.*]] = llvm.getelementptr %[[null]][%[[num_elems]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 // ALIGNED-ALLOC-NEXT:  %[[bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
 // ALIGNED-ALLOC-NEXT:  %[[alignment:.*]] = llvm.mlir.constant(32 : index) : i64
-// ALIGNED-ALLOC-NEXT:  %[[allocated:.*]] = llvm.call @aligned_alloc(%[[alignment]], %[[bytes]]) : (i64, i64) -> !llvm.ptr<i8>
+// ALIGNED-ALLOC-NEXT:  %[[allocated:.*]] = llvm.call @_mlir_aligned_alloc(%[[alignment]], %[[bytes]]) : (i64, i64) -> !llvm.ptr<i8>
 // ALIGNED-ALLOC-NEXT:  llvm.bitcast %[[allocated]] : !llvm.ptr<i8> to !llvm.ptr<f32>
   %0 = memref.alloc() {alignment = 32} : memref<32x18xf32>
   // Do another alloc just to test that we have a unique declaration for
   // aligned_alloc.
-  // ALIGNED-ALLOC:  llvm.call @aligned_alloc
+  // ALIGNED-ALLOC:  llvm.call @_mlir_aligned_alloc
   %1 = memref.alloc() {alignment = 64} : memref<4096xf32>
 
   // Alignment is to element type boundaries (minimum 16 bytes).
   // ALIGNED-ALLOC:  %[[c32:.*]] = llvm.mlir.constant(32 : index) : i64
-  // ALIGNED-ALLOC-NEXT:  llvm.call @aligned_alloc(%[[c32]]
+  // ALIGNED-ALLOC-NEXT:  llvm.call @_mlir_aligned_alloc(%[[c32]]
   %2 = memref.alloc() : memref<4096xvector<8xf32>>
   // The minimum alignment is 16 bytes unless explicitly specified.
   // ALIGNED-ALLOC:  %[[c16:.*]] = llvm.mlir.constant(16 : index) : i64
-  // ALIGNED-ALLOC-NEXT:  llvm.call @aligned_alloc(%[[c16]],
+  // ALIGNED-ALLOC-NEXT:  llvm.call @_mlir_aligned_alloc(%[[c16]],
   %3 = memref.alloc() : memref<4096xvector<2xf32>>
   // ALIGNED-ALLOC:  %[[c8:.*]] = llvm.mlir.constant(8 : index) : i64
-  // ALIGNED-ALLOC-NEXT:  llvm.call @aligned_alloc(%[[c8]],
+  // ALIGNED-ALLOC-NEXT:  llvm.call @_mlir_aligned_alloc(%[[c8]],
   %4 = memref.alloc() {alignment = 8} : memref<1024xvector<4xf32>>
   // Bump the memref allocation size if its size is not a multiple of alignment.
   // ALIGNED-ALLOC:       %[[c32:.*]] = llvm.mlir.constant(32 : index) : i64
@@ -154,11 +154,11 @@
   // ALIGNED-ALLOC-NEXT:  llvm.add
   // ALIGNED-ALLOC-NEXT:  llvm.urem
   // ALIGNED-ALLOC-NEXT:  %[[SIZE_ALIGNED:.*]] = llvm.sub
-  // ALIGNED-ALLOC-NEXT:  llvm.call @aligned_alloc(%[[c32]], %[[SIZE_ALIGNED]])
+  // ALIGNED-ALLOC-NEXT:  llvm.call @_mlir_aligned_alloc(%[[c32]], %[[SIZE_ALIGNED]])
   %5 = memref.alloc() {alignment = 32} : memref<100xf32>
   // Bump alignment to the next power of two if it isn't.
   // ALIGNED-ALLOC:  %[[c128:.*]] = llvm.mlir.constant(128 : index) : i64
-  // ALIGNED-ALLOC:  llvm.call @aligned_alloc(%[[c128]]
+  // ALIGNED-ALLOC:  llvm.call @_mlir_aligned_alloc(%[[c128]]
   %6 = memref.alloc(%N) : memref<?xvector<18xf32>>
   return %0 : memref<32x18xf32>
 }
@@ -551,7 +551,7 @@
   // ALIGNED-ALLOC: llvm.mlir.constant(64 : index)
 
   // Check that the types are converted as expected.
-  // ALIGNED-ALLOC: llvm.call @aligned_alloc
+  // ALIGNED-ALLOC: llvm.call @_mlir_aligned_alloc
   // ALIGNED-ALLOC: llvm.bitcast %{{.*}} : !llvm.ptr<i8> to
   // ALIGNED-ALLOC-SAME: !llvm.
   // ALIGNED-ALLOC-SAME: [[INNER:ptr<struct<\(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>\)>>]]
@@ -576,7 +576,7 @@
     // ALIGNED-ALLOC: llvm.mlir.constant(32 : index)
 
     // Check that the types are converted as expected.
-    // ALIGNED-ALLOC: llvm.call @aligned_alloc
+    // ALIGNED-ALLOC: llvm.call @_mlir_aligned_alloc
     // ALIGNED-ALLOC: llvm.bitcast %{{.*}} : !llvm.ptr<i8> to
     // ALIGNED-ALLOC-SAME: !llvm.
     // ALIGNED-ALLOC-SAME: [[INNER:ptr<struct<\(ptr<f32>, ptr<f32>, i32, array<1 x i32>, array<1 x i32>\)>>]]
@@ -606,7 +606,7 @@
   // Static alignment should be computed as ceilPowerOf2(2 * sizeof(pointer) +
   // (1 + 2 * rank) * sizeof(index) = ceilPowerOf2(2 * 8 + 3 * 8) = 64.
   // ALIGNED-ALLOC: llvm.mlir.constant(64 : index)
-  // ALIGNED-ALLOC: llvm.call @aligned_alloc
+  // ALIGNED-ALLOC: llvm.call @_mlir_aligned_alloc
   %0 = memref.alloc() : memref<1 x memref<2 x memref<3 x f32>>>
   return
 }
@@ -623,7 +623,7 @@
   // Static alignment should be computed as ceilPowerOf2(sizeof(index) +
   // sizeof(pointer)) = 16.
   // ALIGNED-ALLOC: llvm.mlir.constant(16 : index)
-  // ALIGNED-ALLOC: llvm.call @aligned_alloc
+  // ALIGNED-ALLOC: llvm.call @_mlir_aligned_alloc
   // ALIGNED-ALLOC: llvm.bitcast
   // ALIGNED-ALLOC-SAME: !llvm.ptr<i8> to !llvm.[[INNER]]
   %0 = memref.alloc() : memref<1 x memref<* x f32>>
diff --git a/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir
--- a/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir
+++ b/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir
@@ -6,7 +6,7 @@
 // CHECK: %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
 // CHECK: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-// CHECK: llvm.call @malloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
+// CHECK: llvm.call @_mlir_alloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
 // CHECK: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
 // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
 // CHECK: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
@@ -26,7 +26,7 @@
 // CHECK: unrealized_conversion_cast
 // CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
 // CHECK: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
-// CHECK: llvm.call @free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
+// CHECK: llvm.call @_mlir_free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
 
   memref.dealloc %arg0 : memref<f32>
   return
@@ -43,7 +43,7 @@
 // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
 // CHECK: %[[alignment:.*]] = llvm.mlir.constant(8 : index) : i64
 // CHECK: %[[allocsize:.*]] = llvm.add %[[size_bytes]], %[[alignment]] : i64
-// CHECK: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (i64) -> !llvm.ptr<i8>
+// CHECK: %[[allocated:.*]] = llvm.call @_mlir_alloc(%[[allocsize]]) : (i64) -> !llvm.ptr<i8>
 // CHECK: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
 // CHECK: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[ptr]] : !llvm.ptr<f32> to i64
 // CHECK: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : i64
@@ -69,7 +69,7 @@
 // CHECK: %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
 // CHECK: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[num_elems]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-// CHECK: %[[allocated:.*]] = llvm.call @malloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
+// CHECK: %[[allocated:.*]] = llvm.call @_mlir_alloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
 // CHECK: llvm.bitcast %[[allocated]] : !llvm.ptr<i8> to !llvm.ptr<f32>
  %0 = memref.alloc() : memref<32x18xf32>
  return %0 : memref<32x18xf32>
@@ -106,7 +106,7 @@
 func.func @static_dealloc(%static: memref<10x8xf32>) {
 // CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
-// CHECK: llvm.call @free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
+// CHECK: llvm.call @_mlir_free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
   memref.dealloc %static : memref<10x8xf32>
   return
 }
@@ -206,7 +206,7 @@
     // CHECK: llvm.ptrtoint %{{.*}} : !llvm.ptr<{{.*}}> to i32
     // CHECK: llvm.ptrtoint %{{.*}} : !llvm.ptr<{{.*}}> to i32
     // CHECK: llvm.add %{{.*}} : i32
-    // CHECK: llvm.call @malloc(%{{.*}}) : (i32) -> !llvm.ptr
+    // CHECK: llvm.call @_mlir_alloc(%{{.*}}) : (i32) -> !llvm.ptr
     // CHECK: llvm.ptrtoint %{{.*}} : !llvm.ptr<{{.*}}> to i32
     // CHECK: llvm.sub {{.*}} : i32
     // CHECK: llvm.add {{.*}} : i32
diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir
--- a/mlir/test/Target/LLVMIR/llvmir.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir.mlir
@@ -147,9 +147,9 @@
 // inserted before other functions in the module.
 //
 
-// CHECK: declare ptr @malloc(i64)
-llvm.func @malloc(i64) -> !llvm.ptr<i8>
-// CHECK: declare void @free(ptr)
+// CHECK: declare ptr @_mlir_alloc(i64)
+llvm.func @_mlir_alloc(i64) -> !llvm.ptr<i8>
+// CHECK: declare void @_mlir_free(ptr)
 
 
 //
@@ -499,7 +499,7 @@
 
 // CHECK-LABEL: define void @memref_alloc()
 llvm.func @memref_alloc() {
-// CHECK-NEXT: %{{[0-9]+}} = call ptr @malloc(i64 400)
+// CHECK-NEXT: %{{[0-9]+}} = call ptr @_mlir_alloc(i64 400)
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr } undef, ptr %{{[0-9]+}}, 0
   %0 = llvm.mlir.constant(10 : index) : i64
   %1 = llvm.mlir.constant(10 : index) : i64
@@ -507,7 +507,7 @@
   %3 = llvm.mlir.undef : !llvm.struct<(ptr<f32>)>
   %4 = llvm.mlir.constant(4 : index) : i64
   %5 = llvm.mul %2, %4 : i64
-  %6 = llvm.call @malloc(%5) : (i64) -> !llvm.ptr<i8>
+  %6 = llvm.call @_mlir_alloc(%5) : (i64) -> !llvm.ptr<i8>
   %7 = llvm.bitcast %6 : !llvm.ptr<i8> to !llvm.ptr<f32>
   %8 = llvm.insertvalue %7, %3[0] : !llvm.struct<(ptr<f32>)>
 // CHECK-NEXT: ret void
@@ -520,13 +520,13 @@
 // CHECK-LABEL: define void @store_load_static()
 llvm.func @store_load_static() {
 ^bb0:
-// CHECK-NEXT: %{{[0-9]+}} = call ptr @malloc(i64 40)
+// CHECK-NEXT: %{{[0-9]+}} = call ptr @_mlir_alloc(i64 40)
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr } undef, ptr %{{[0-9]+}}, 0
   %0 = llvm.mlir.constant(10 : index) : i64
   %1 = llvm.mlir.undef : !llvm.struct<(ptr<f32>)>
   %2 = llvm.mlir.constant(4 : index) : i64
   %3 = llvm.mul %0, %2 : i64
-  %4 = llvm.call @malloc(%3) : (i64) -> !llvm.ptr<i8>
+  %4 = llvm.call @_mlir_alloc(%3) : (i64) -> !llvm.ptr<i8>
   %5 = llvm.bitcast %4 : !llvm.ptr<i8> to !llvm.ptr<f32>
   %6 = llvm.insertvalue %5, %1[0] : !llvm.struct<(ptr<f32>)>
   %7 = llvm.mlir.constant(1.000000e+00 : f32) : f32
@@ -587,13 +587,13 @@
 // CHECK-LABEL: define void @store_load_dynamic(i64 {{%.*}})
 llvm.func @store_load_dynamic(%arg0: i64) {
 // CHECK-NEXT: %{{[0-9]+}} = mul i64 %{{[0-9]+}}, 4
-// CHECK-NEXT: %{{[0-9]+}} = call ptr @malloc(i64 %{{[0-9]+}})
+// CHECK-NEXT: %{{[0-9]+}} = call ptr @_mlir_alloc(i64 %{{[0-9]+}})
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64 } undef, ptr %{{[0-9]+}}, 0
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64 } %{{[0-9]+}}, i64 %{{[0-9]+}}, 1
   %0 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, i64)>
   %1 = llvm.mlir.constant(4 : index) : i64
   %2 = llvm.mul %arg0, %1 : i64
-  %3 = llvm.call @malloc(%2) : (i64) -> !llvm.ptr<i8>
+  %3 = llvm.call @_mlir_alloc(%2) : (i64) -> !llvm.ptr<i8>
   %4 = llvm.bitcast %3 : !llvm.ptr<i8> to !llvm.ptr<f32>
   %5 = llvm.insertvalue %4, %0[0] : !llvm.struct<(ptr<f32>, i64)>
   %6 = llvm.insertvalue %arg0, %5[1] : !llvm.struct<(ptr<f32>, i64)>
@@ -660,7 +660,7 @@
 // CHECK-NEXT: %{{[0-9]+}} = mul i64 %{{[0-9]+}}, 4
 // CHECK-NEXT: %{{[0-9]+}} = mul i64 %{{[0-9]+}}, 10
 // CHECK-NEXT: %{{[0-9]+}} = mul i64 %{{[0-9]+}}, 4
-// CHECK-NEXT: %{{[0-9]+}} = call ptr @malloc(i64 %{{[0-9]+}})
+// CHECK-NEXT: %{{[0-9]+}} = call ptr @_mlir_alloc(i64 %{{[0-9]+}})
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64, i64 } undef, ptr %{{[0-9]+}}, 0
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64, i64 } %{{[0-9]+}}, i64 %{{[0-9]+}}, 1
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64, i64 } %{{[0-9]+}}, i64 10, 2
@@ -672,7 +672,7 @@
   %6 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, i64, i64)>
   %7 = llvm.mlir.constant(4 : index) : i64
   %8 = llvm.mul %5, %7 : i64
-  %9 = llvm.call @malloc(%8) : (i64) -> !llvm.ptr<i8>
+  %9 = llvm.call @_mlir_alloc(%8) : (i64) -> !llvm.ptr<i8>
   %10 = llvm.bitcast %9 : !llvm.ptr<i8> to !llvm.ptr<f32>
   %11 = llvm.insertvalue %10, %6[0] : !llvm.struct<(ptr<f32>, i64, i64)>
   %12 = llvm.insertvalue %arg0, %11[1] : !llvm.struct<(ptr<f32>, i64, i64)>
@@ -773,7 +773,7 @@
   llvm.store %2, %14 : !llvm.ptr<f32>
 // CHECK-NEXT: %{{[0-9]+}} = mul i64 10, %{{[0-9]+}}
 // CHECK-NEXT: %{{[0-9]+}} = mul i64 %{{[0-9]+}}, 4
-// CHECK-NEXT: %{{[0-9]+}} = call ptr @malloc(i64 %{{[0-9]+}})
+// CHECK-NEXT: %{{[0-9]+}} = call ptr @_mlir_alloc(i64 %{{[0-9]+}})
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64 } undef, ptr %{{[0-9]+}}, 0
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64 } %{{[0-9]+}}, i64 %{{[0-9]+}}, 1
   %15 = llvm.mlir.constant(10 : index) : i64
@@ -781,7 +781,7 @@
   %17 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, i64)>
   %18 = llvm.mlir.constant(4 : index) : i64
   %19 = llvm.mul %16, %18 : i64
-  %20 = llvm.call @malloc(%19) : (i64) -> !llvm.ptr<i8>
+  %20 = llvm.call @_mlir_alloc(%19) : (i64) -> !llvm.ptr<i8>
   %21 = llvm.bitcast %20 : !llvm.ptr<i8> to !llvm.ptr<f32>
   %22 = llvm.insertvalue %21, %17[0] : !llvm.struct<(ptr<f32>, i64)>
   %23 = llvm.insertvalue %1, %22[1] : !llvm.struct<(ptr<f32>, i64)>
diff --git a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
--- a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
+++ b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
@@ -1,4 +1,6 @@
-// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}" -reconcile-unrealized-casts | mlir-cpu-runner -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}" -reconcile-unrealized-casts \
+// RUN: | mlir-cpu-runner -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void \
+// RUN: | FileCheck %s
 
 // Verify bare pointer memref calling convention. `simple_add1_add2_test`
 // gets two 2xf32 memrefs, adds 1.0f to the first one and 2.0f to the second
@@ -26,8 +28,8 @@
 }
 
 // External declarations.
-llvm.func @malloc(i64) -> !llvm.ptr<i8>
-llvm.func @free(!llvm.ptr<i8>)
+llvm.func @_mlir_alloc(i64) -> !llvm.ptr<i8>
+llvm.func @_mlir_free(!llvm.ptr<i8>)
 func.func private @printF32(%arg0: f32)
 func.func private @printComma()
 func.func private @printNewline()
diff --git a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
--- a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
+++ b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
@@ -1,4 +1,6 @@
-// RUN: mlir-opt -pass-pipeline="func.func(convert-linalg-to-loops,lower-affine,convert-scf-to-cf,convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-linalg-to-loops,lower-affine,convert-scf-to-cf,convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" %s \
+// RUN: | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck %s
 
 func.func @main() {
   %A = memref.alloc() : memref<16x16xf32>
diff --git a/mlir/test/mlir-cpu-runner/simple.mlir b/mlir/test/mlir-cpu-runner/simple.mlir
--- a/mlir/test/mlir-cpu-runner/simple.mlir
+++ b/mlir/test/mlir-cpu-runner/simple.mlir
@@ -1,22 +1,36 @@
-// RUN: mlir-cpu-runner %s | FileCheck %s
-// RUN: mlir-cpu-runner %s -e foo | FileCheck -check-prefix=NOMAIN %s
-// RUN: mlir-cpu-runner %s --entry-point-result=i32 -e int32_main | FileCheck -check-prefix=INT32MAIN %s
-// RUN: mlir-cpu-runner %s --entry-point-result=i64 -e int64_main | FileCheck -check-prefix=INT64MAIN %s
-// RUN: mlir-cpu-runner %s -O3 | FileCheck %s
+// RUN: mlir-cpu-runner %s \
+// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck %s
+
+// RUN: mlir-cpu-runner %s -e foo \
+// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck -check-prefix=NOMAIN %s
+
+// RUN: mlir-cpu-runner %s --entry-point-result=i32 -e int32_main \
+// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck -check-prefix=INT32MAIN %s
+
+// RUN: mlir-cpu-runner %s --entry-point-result=i64 -e int64_main \
+// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck -check-prefix=INT64MAIN %s
+
+// RUN: mlir-cpu-runner %s -O3 \
+// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck %s
 
 // RUN: cp %s %t
-// RUN: mlir-cpu-runner %t -dump-object-file | FileCheck %t
+// RUN: mlir-cpu-runner %t -dump-object-file -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %t
 // RUN: ls %t.o
 // RUN: rm %t.o
 
-// RUN: mlir-cpu-runner %s -dump-object-file -object-filename=%T/test.o | FileCheck %s
+// RUN: mlir-cpu-runner %s -dump-object-file -object-filename=%T/test.o -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
 // RUN: ls %T/test.o
 // RUN: rm %T/test.o
 
 // Declarations of C library functions.
 llvm.func @fabsf(f32) -> f32
-llvm.func @malloc(i64) -> !llvm.ptr<i8>
-llvm.func @free(!llvm.ptr<i8>)
+llvm.func @_mlir_alloc(i64) -> !llvm.ptr<i8>
+llvm.func @_mlir_free(!llvm.ptr<i8>)
 
 // Check that a simple function with a nested call works.
 llvm.func @main() -> f32 {
@@ -26,16 +40,16 @@
 }
 // CHECK: 4.200000e+02
 
-// Helper typed functions wrapping calls to "malloc" and "free".
+// Helper typed functions wrapping calls to "_mlir_alloc" and "_mlir_free".
 llvm.func @allocation() -> !llvm.ptr<f32> {
   %0 = llvm.mlir.constant(4 : index) : i64
-  %1 = llvm.call @malloc(%0) : (i64) -> !llvm.ptr<i8>
+  %1 = llvm.call @_mlir_alloc(%0) : (i64) -> !llvm.ptr<i8>
   %2 = llvm.bitcast %1 : !llvm.ptr<i8> to !llvm.ptr<f32>
   llvm.return %2 : !llvm.ptr<f32>
 }
 llvm.func @deallocation(%arg0: !llvm.ptr<f32>) {
   %0 = llvm.bitcast %arg0 : !llvm.ptr<f32> to !llvm.ptr<i8>
-  llvm.call @free(%0) : (!llvm.ptr<i8>) -> ()
+  llvm.call @_mlir_free(%0) : (!llvm.ptr<i8>) -> ()
   llvm.return
 }