diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -547,6 +547,17 @@ if (T.isNVPTX()) { TLI.disableAllFunctions(); TLI.setAvailable(LibFunc_nvvm_reflect); + TLI.setAvailable(llvm::LibFunc_malloc); + TLI.setAvailable(llvm::LibFunc_free); + + // TODO: We could enable the following two according to [0] but we haven't + // done an evaluation wrt. the performance implications. + // [0] + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations + // + // TLI.setAvailable(llvm::LibFunc_memcpy); + // TLI.setAvailable(llvm::LibFunc_memset); + } else { TLI.setUnavailable(LibFunc_nvvm_reflect); } diff --git a/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll b/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll --- a/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll +++ b/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll @@ -29,3 +29,32 @@ define i128 @__umodti3(i128, i128) { ret i128 0 } + +declare i8* @malloc(i64) +declare void @free(i8*) + +define void @malloc_then_free() { +; CHECK: call.uni (retval0), +; CHECK: malloc, +; CHECK: ( +; CHECK: param0 +; CHECK: ); +; CHECK: ld.param.b32 %r1, [retval0+0]; +; CHECK: } // callseq 1 +; CHECK: mov.u16 %rs1, 0; +; CHECK: st.u8 [%r1], %rs1; +; CHECK: { // callseq 2, 0 +; CHECK: .reg .b32 temp_param_reg; +; CHECK: .param .b32 param0; +; CHECK: st.param.b32 [param0+0], %r1; +; CHECK: call.uni +; CHECK: free, +; CHECK: ( +; CHECK: param0 +; CHECK: ); +; CHECK: } // callseq 2 + %a = call i8* @malloc(i64 4) + store i8 0, i8* %a + call void @free(i8* %a) + ret void +} diff --git a/llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll b/llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Ensure the nvptx backend states malloc & free are a thing so we can recognize +; and optimize them properly. +target triple = "nvptx64" + +declare i8* @malloc(i64) +declare void @free(i8*) + +define void @malloc_then_free() { +; CHECK-LABEL: @malloc_then_free( +; CHECK-NEXT: ret void +; + %a = call i8* @malloc(i64 4) + store i8 0, i8* %a + call void @free(i8* %a) + ret void +} +