Index: llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h @@ -0,0 +1,46 @@ +//===--------- HipStdPar.h - Standard Parallelism passes --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// AcceleratorCodeSelection - Identify all functions reachable from a kernel, +/// removing those that are unreachable. +/// +/// AllocationInterposition - Forward calls to allocation / deallocation +// functions to runtime provided equivalents that allocate memory that is +// accessible for an accelerator +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H +#define LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; +class ModuleAnaysisManager; + +class HipStdParAcceleratorCodeSelectionPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); + + static bool isRequired() { return true; } +}; + +class HipStdParAllocationInterpositionPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); + + static bool isRequired() { return true; } +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H Index: llvm/lib/Passes/CMakeLists.txt =================================================================== --- llvm/lib/Passes/CMakeLists.txt +++ llvm/lib/Passes/CMakeLists.txt @@ -19,6 +19,7 @@ CodeGen Core Coroutines + HipStdPar IPO InstCombine IRPrinter Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -94,6 +94,7 @@ #include "llvm/Transforms/Coroutines/CoroEarly.h" #include "llvm/Transforms/Coroutines/CoroElide.h" #include "llvm/Transforms/Coroutines/CoroSplit.h" +#include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/Annotation2Metadata.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" Index: llvm/lib/Passes/PassBuilderPipelines.cpp =================================================================== --- llvm/lib/Passes/PassBuilderPipelines.cpp +++ llvm/lib/Passes/PassBuilderPipelines.cpp @@ -37,6 +37,7 @@ #include "llvm/Transforms/Coroutines/CoroEarly.h" #include "llvm/Transforms/Coroutines/CoroElide.h" #include "llvm/Transforms/Coroutines/CoroSplit.h" +#include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/Annotation2Metadata.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -64,6 +64,9 @@ MODULE_PASS("function-import", FunctionImportPass()) MODULE_PASS("globalopt", GlobalOptPass()) MODULE_PASS("globalsplit", GlobalSplitPass()) +MODULE_PASS("hipstdpar-select-accelerator-code", + HipStdParAcceleratorCodeSelectionPass()) +MODULE_PASS("hipstdpar-interpose-alloc", HipStdParAllocationInterpositionPass()) MODULE_PASS("hotcoldsplit", HotColdSplittingPass()) MODULE_PASS("inferattrs", InferFunctionAttrsPass()) MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass()) Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -50,6 +50,7 @@ #include "llvm/InitializePasses.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/GlobalDCE.h" @@ -348,6 +349,11 @@ cl::desc("Enable rewrite partial reg uses pass"), cl::init(false), cl::Hidden); +static cl::opt EnableHipStdPar( + "amdgpu-enable-hipstdpar", + cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), + cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine X(getTheR600Target()); @@ -699,6 +705,8 @@ if (EnableLibCallSimplify && Level != OptimizationLevel::O0) FPM.addPass(AMDGPUSimplifyLibCallsPass()); PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + if (EnableHipStdPar) + PM.addPass(HipStdParAcceleratorCodeSelectionPass()); }); PB.registerPipelineEarlySimplificationEPCallback( Index: llvm/lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- llvm/lib/Target/AMDGPU/CMakeLists.txt +++ llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -176,6 +176,7 @@ CodeGenTypes Core GlobalISel + HIPStdPar IPO MC MIRParser Index: llvm/lib/Transforms/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/CMakeLists.txt +++ llvm/lib/Transforms/CMakeLists.txt @@ -9,3 +9,4 @@ add_subdirectory(ObjCARC) add_subdirectory(Coroutines) add_subdirectory(CFGuard) +add_subdirectory(HipStdPar) Index: llvm/lib/Transforms/HipStdPar/CMakeLists.txt =================================================================== --- /dev/null +++ llvm/lib/Transforms/HipStdPar/CMakeLists.txt @@ -0,0 +1,15 @@ +add_llvm_component_library(LLVMHipStdPar + HipStdPar.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/HipStdPar + + DEPENDS + intrinsics_gen + LLVMAnalysis + + LINK_COMPONENTS + Analysis + Core + Support + TransformUtils) Index: llvm/lib/Transforms/HipStdPar/HipStdPar.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/HipStdPar/HipStdPar.cpp @@ -0,0 +1,312 @@ +//===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file implements two passes that enable HIP C++ Standard Parallelism +// Support: +// +// 1. AcceleratorCodeSelection (required): Given that only algorithms are +// accelerated, and that the accelerated implementation exists in the form of +// a compute kernel, we assume that only the kernel, and all functions +// reachable from it, constitute code that the user expects the accelerator +// to execute. Thus, we identify the set of all functions reachable from +// kernels, and then remove all unreachable ones. This last part is necessary +// because it is possible for code that the user did not expect to execute on +// an accelerator to contain constructs that cannot be handled by the target +// BE, which cannot be provably demonstrated to be dead code in general, and +// thus can lead to mis-compilation. The degenerate case of this is when a +// Module contains no kernels (the parent TU had no algorithm invocations fit +// for acceleration), which we handle by completely emptying said module. +// **NOTE**: The above does not handle indirectly reachable functions i.e. +// it is possible to obtain a case where the target of an indirect +// call is otherwise unreachable and thus is removed; this +// restriction is aligned with the current `-hipstdpar` limitations +// and will be relaxed in the future. +// +// 2. AllocationInterposition (required only when on-demand paging is +// unsupported): Some accelerators or operating systems might not support +// transparent on-demand paging. Thus, they would only be able to access +// memory that is allocated by an accelerator-aware mechanism. For such cases +// the user can opt into enabling allocation / deallocation interposition, +// whereby we replace calls to known allocation / deallocation functions with +// calls to runtime implemented equivalents that forward the requests to +// accelerator-aware interfaces. We also support freeing system allocated +// memory that ends up in one of the runtime equivalents, since this can +// happen if e.g. a library that was compiled without interposition returns +// an allocation that can be validly passed to `free`. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/HipStdPar/HipStdPar.h" + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +#include +#include +#include + +using namespace llvm; + +template +static inline void eraseFromModule(T &ToErase) { + ToErase.replaceAllUsesWith(PoisonValue::get(ToErase.getType())); + ToErase.eraseFromParent(); +} + +static inline bool checkIfSupported(GlobalVariable &G) { + if (!G.isThreadLocal()) + return true; + + G.dropDroppableUses(); + + if (!G.isConstantUsed()) + return true; + + std::string W; + raw_string_ostream OS(W); + + OS << "Accelerator does not support the thread_local variable " + << G.getName(); + + Instruction *I = nullptr; + SmallVector Tmp(G.user_begin(), G.user_end()); + SmallPtrSet Visited; + do { + auto U = std::move(Tmp.back()); + Tmp.pop_back(); + + if (Visited.contains(U)) + continue; + + if (isa(U)) + I = cast(U); + else + Tmp.insert(Tmp.end(), U->user_begin(), U->user_end()); + + Visited.insert(U); + } while (!I && !Tmp.empty()); + + assert(I && "thread_local global should have at least one non-constant use."); + + G.getContext().diagnose( + DiagnosticInfoUnsupported(*I->getParent()->getParent(), W, + I->getDebugLoc(), DS_Error)); + + return false; +} + +static inline void clearModule(Module &M) { // TODO: simplify. + while (!M.functions().empty()) + eraseFromModule(*M.begin()); + while (!M.globals().empty()) + eraseFromModule(*M.globals().begin()); + while (!M.aliases().empty()) + eraseFromModule(*M.aliases().begin()); + while (!M.ifuncs().empty()) + eraseFromModule(*M.ifuncs().begin()); +} + +static inline void maybeHandleGlobals(Module &M) { + unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace(); + for (auto &&G : M.globals()) { // TODO: should we handle these in the FE? + if (!checkIfSupported(G)) + return clearModule(M); + + if (G.isThreadLocal()) + continue; + if (G.isConstant()) + continue; + if (G.getAddressSpace() != GlobAS) + continue; + if (G.getLinkage() != GlobalVariable::ExternalLinkage) + continue; + + G.setLinkage(GlobalVariable::ExternalWeakLinkage); + G.setExternallyInitialized(true); + } +} + +template +static inline void removeUnreachableFunctions( + const SmallPtrSet& Reachable, Module &M) { + removeFromUsedLists(M, [&](Constant *C) { + if (auto F = dyn_cast(C)) + return !Reachable.contains(F); + + return false; + }); + + SmallVector> ToRemove; + copy_if(M, std::back_inserter(ToRemove), [&](auto &&F) { + return !F.isIntrinsic() && !Reachable.contains(&F); + }); + + for_each(ToRemove, eraseFromModule); +} + +static inline bool isAcceleratorExecutionRoot(const Function *F) { + if (!F) + return false; + + return F->getCallingConv() == CallingConv::AMDGPU_KERNEL; +} + +static inline bool checkIfSupported(const Function *F, const CallBase *CB) { + const auto Dx = F->getName().rfind("__hipstdpar_unsupported"); + + if (Dx == StringRef::npos) + return true; + + const auto N = F->getName().substr(0, Dx); + + std::string W; + raw_string_ostream OS(W); + + if (N == "__ASM") + OS << "Accelerator does not support the ASM block:\n" + << cast(CB->getArgOperand(0))->getAsCString(); + else + OS << "Accelerator does not support the " << N << " function."; + + auto Caller = CB->getParent()->getParent(); + + Caller->getContext().diagnose( + DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error)); + + return false; +} + +PreservedAnalyses + HipStdParAcceleratorCodeSelectionPass::run(Module &M, + ModuleAnalysisManager &MAM) { + auto &CGA = MAM.getResult(M); + + SmallPtrSet Reachable; + for (auto &&CGN : CGA) { + if (!isAcceleratorExecutionRoot(CGN.first)) + continue; + + Reachable.insert(CGN.first); + + SmallVector Tmp({CGN.first}); + do { + auto F = std::move(Tmp.back()); + Tmp.pop_back(); + + for (auto &&N : *CGA[F]) { + if (!N.second) + continue; + if (!N.second->getFunction()) + continue; + if (Reachable.contains(N.second->getFunction())) + continue; + + if (!checkIfSupported(N.second->getFunction(), + dyn_cast(*N.first))) + return PreservedAnalyses::none(); + + Reachable.insert(N.second->getFunction()); + Tmp.push_back(N.second->getFunction()); + } + } while (!std::empty(Tmp)); + } + + if (std::empty(Reachable)) + clearModule(M); + else + removeUnreachableFunctions(Reachable, M); + + maybeHandleGlobals(M); + + return PreservedAnalyses::none(); +} + +static constexpr std::pair ReplaceMap[]{ + {"aligned_alloc", "__hipstdpar_aligned_alloc"}, + {"calloc", "__hipstdpar_calloc"}, + {"free", "__hipstdpar_free"}, + {"malloc", "__hipstdpar_malloc"}, + {"memalign", "__hipstdpar_aligned_alloc"}, + {"posix_memalign", "__hipstdpar_posix_aligned_alloc"}, + {"realloc", "__hipstdpar_realloc"}, + {"reallocarray", "__hipstdpar_realloc_array"}, + {"_ZdaPv", "__hipstdpar_operator_delete"}, + {"_ZdaPvm", "__hipstdpar_operator_delete_sized"}, + {"_ZdaPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"}, + {"_ZdaPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"}, + {"_ZdlPv", "__hipstdpar_operator_delete"}, + {"_ZdlPvm", "__hipstdpar_operator_delete_sized"}, + {"_ZdlPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"}, + {"_ZdlPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"}, + {"_Znam", "__hipstdpar_operator_new"}, + {"_ZnamRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"}, + {"_ZnamSt11align_val_t", "__hipstdpar_operator_new_aligned"}, + {"_ZnamSt11align_val_tRKSt9nothrow_t", + "__hipstdpar_operator_new_aligned_nothrow"}, + + {"_Znwm", "__hipstdpar_operator_new"}, + {"_ZnwmRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"}, + {"_ZnwmSt11align_val_t", "__hipstdpar_operator_new_aligned"}, + {"_ZnwmSt11align_val_tRKSt9nothrow_t", + "__hipstdpar_operator_new_aligned_nothrow"}, + {"__builtin_calloc", "__hipstdpar_calloc"}, + {"__builtin_free", "__hipstdpar_free"}, + {"__builtin_malloc", "__hipstdpar_malloc"}, + {"__builtin_operator_delete", "__hipstdpar_operator_delete"}, + {"__builtin_operator_new", "__hipstdpar_operator_new"}, + {"__builtin_realloc", "__hipstdpar_realloc"}, + {"__libc_calloc", "__hipstdpar_calloc"}, + {"__libc_free", "__hipstdpar_free"}, + {"__libc_malloc", "__hipstdpar_malloc"}, + {"__libc_memalign", "__hipstdpar_aligned_alloc"}, + {"__libc_realloc", "__hipstdpar_realloc"} +}; + +PreservedAnalyses +HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) { + SmallDenseMap AllocReplacements(std::cbegin(ReplaceMap), + std::cend(ReplaceMap)); + + for (auto &&F : M) { + if (!F.hasName()) + continue; + if (!AllocReplacements.contains(F.getName())) + continue; + + if (auto R = M.getFunction(AllocReplacements[F.getName()])) { + F.replaceAllUsesWith(R); + } else { + std::string W; + raw_string_ostream OS(W); + + OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()] + << ". Tried to run the allocation interposition pass without the " + << "replacement functions available."; + + F.getContext().diagnose(DiagnosticInfoUnsupported(F, W, + F.getSubprogram(), + DS_Warning)); + } + } + + if (auto F = M.getFunction("__hipstdpar_hidden_free")) { + auto LibcFree = M.getOrInsertFunction("__libc_free", F->getFunctionType(), + F->getAttributes()); + F->replaceAllUsesWith(LibcFree.getCallee()); + + eraseFromModule(*F); + } + + return PreservedAnalyses::none(); +} Index: llvm/test/Transforms/HipStdPar/accelerator-code-selection.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HipStdPar/accelerator-code-selection.ll @@ -0,0 +1,116 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \ +; RUN: %s | FileCheck %s + +$_ZNK8CallableclEPi = comdat any +$_ZNK8CallableclEPf = comdat any +$_ZNK8Callable6mem_fnEPKi = comdat any +$_ZN8Callable13static_mem_fnEPKi = comdat any +; CHECK-NOT: $_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf +$_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf = comdat any +; CHECK-NOT: $_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf +$_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf = comdat any + +%struct.Callable = type { [64 x i8] } + +; CHECK-NOT: @should_be_removed +@llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr @should_be_removed], section "llvm.metadata" + +define void @should_be_removed(ptr %p) { + ret void +} + +declare void @llvm.trap() + +; CHECK: define {{.*}} @called_via_chain +define void @called_via_chain(ptr %p) { + entry: + %tobool.not = icmp eq ptr %p, null + br i1 %tobool.not, label %if.then, label %if.end + + if.then: + tail call void @llvm.trap() + unreachable + + if.end: + ret void +} + +; CHECK: define {{.*}} @directly_called +define void @directly_called(ptr %p) { + tail call void @called_via_chain(ptr %p) + ret void +} + +; CHECK: define {{.*}} amdgpu_kernel {{.*}} @accelerator_execution_root +define hidden amdgpu_kernel void @accelerator_execution_root(ptr %p) { + tail call void @directly_called(ptr %p) + ret void +} + +; CHECK-NOT: @defined_elsewhere_should_be_removed +declare void @defined_elsewhere_should_be_removed(ptr) + +; CHECK: declare {{.*}} @defined_elsewhere_directly_called +declare void @defined_elsewhere_directly_called(ptr) + +; CHECK: define {{.*}} amdgpu_kernel {{.*}} @another_accelerator_execution_root +define hidden amdgpu_kernel void @another_accelerator_execution_root(ptr %p) { + tail call void @defined_elsewhere_directly_called(ptr %p) + ret void +} + +; Also test passing a callable object (functor / lambda) to a kernel, which is +; the common pattern for customising algorithms. + +; CHECK: define {{.*}} amdgpu_kernel {{.*}} @_Z22accelerator_execution_root_taking_callablePi8Callable +define hidden amdgpu_kernel void @_Z22accelerator_execution_root_taking_callablePi8Callable(ptr noundef %p, ptr addrspace(4) nocapture readonly byref(%struct.Callable) align 8 %callable) { + %callable_in_generic = addrspacecast ptr addrspace(4) %callable to ptr + call void @_ZNK8CallableclEPi(ptr noundef nonnull align 1 dereferenceable(64) %callable_in_generic, ptr noundef %p) + + ret void +} + +; CHECK: define {{.*}} @_ZNK8CallableclEPi +define linkonce_odr dso_local void @_ZNK8CallableclEPi(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) { + call void @_ZNK8Callable6mem_fnEPKi(ptr noundef nonnull align 1 dereferenceable(1) %this, ptr noundef %p) + + ret void +} + +; CHECK: define {{.*}} @_ZNK8Callable6mem_fnEPKi +define linkonce_odr dso_local void @_ZNK8Callable6mem_fnEPKi(ptr noundef nonnull align 1 dereferenceable(1) %this, ptr noundef %p) { + call void @_ZN8Callable13static_mem_fnEPKi(ptr noundef %p) + + ret void +} + +; CHECK: define {{.*}} @_ZN8Callable13static_mem_fnEPKi +define linkonce_odr dso_local void @_ZN8Callable13static_mem_fnEPKi(ptr noundef %p) { + ret void +} + +; CHECK-NOT: define {{.*}} @_Z26non_kernel_taking_callablePf8Callable +define dso_local void @_Z26non_kernel_taking_callablePf8Callable(ptr noundef %p, ptr noundef byval(%struct.Callable) align 8 %callable) { + call void @_ZNK8CallableclEPf(ptr noundef nonnull align 1 dereferenceable(64) %callable, ptr noundef %p) + + ret void +} + +; CHECK-NOT: define {{.*}} @_ZNK8CallableclEPf +define linkonce_odr dso_local void @_ZNK8CallableclEPf(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) { + call void @_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) + + ret void +} + +; CHECK-NOT: @_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf +define linkonce_odr dso_local void @_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) { + call void @_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf(ptr noundef %p) + + ret void +} + +; CHECK-NOT: @_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf +define linkonce_odr dso_local void @_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf(ptr noundef %p) { + ret void +} \ No newline at end of file Index: llvm/test/Transforms/HipStdPar/allocation-interposition.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HipStdPar/allocation-interposition.ll @@ -0,0 +1,221 @@ +; RUN: opt -S -passes=hipstdpar-interpose-alloc %s | FileCheck %s + +%"struct.std::nothrow_t" = type { i8 } + +@_ZSt7nothrow = external global %"struct.std::nothrow_t", align 1 + +declare ptr @__hipstdpar_aligned_alloc(i64, i64) + +declare ptr @__hipstdpar_malloc(i64) + +declare ptr @__hipstdpar_calloc(i64, i64) + +declare i32 @__hipstdpar_posix_aligned_alloc(ptr, i64, i64) + +declare void @__hipstdpar_hidden_free(ptr) + +declare ptr @__hipstdpar_realloc(ptr, i64) + +declare ptr @__hipstdpar_realloc_array(ptr, i64, i64) + +declare void @__hipstdpar_free(ptr) + +declare ptr @__hipstdpar_operator_new_aligned(i64, i64) + +declare ptr @__hipstdpar_operator_new(i64) + +declare ptr @__hipstdpar_operator_new_nothrow(i64, %"struct.std::nothrow_t") + +declare ptr @__hipstdpar_operator_new_aligned_nothrow(i64, i64, %"struct.std::nothrow_t") + +declare void @__hipstdpar_operator_delete_aligned_sized(ptr, i64, i64) + +declare void @__hipstdpar_operator_delete(ptr) + +declare void @__hipstdpar_operator_delete_aligned(ptr, i64) + +declare void @__hipstdpar_operator_delete_sized(ptr, i64) + +define dso_local noundef i32 @allocs() { + ; CHECK: %1 = call noalias align 8 ptr @__hipstdpar_aligned_alloc(i64 noundef 8, i64 noundef 42) + %1 = call noalias align 8 ptr @aligned_alloc(i64 noundef 8, i64 noundef 42) + ; CHECK: call void @__hipstdpar_free(ptr noundef %1) + call void @free(ptr noundef %1) + + ; CHECK: %2 = call noalias ptr @__hipstdpar_calloc(i64 noundef 1, i64 noundef 42) + %2 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42) + ; CHECK: call void @__hipstdpar_free(ptr noundef %2) + call void @free(ptr noundef %2) + + ; CHECK: %3 = call noalias ptr @__hipstdpar_malloc(i64 noundef 42) + %3 = call noalias ptr @malloc(i64 noundef 42) + ; CHECK: call void @__hipstdpar_free(ptr noundef %3) + call void @free(ptr noundef %3) + + ; CHECK: %4 = call noalias align 8 ptr @__hipstdpar_aligned_alloc(i64 noundef 8, i64 noundef 42) + %4 = call noalias align 8 ptr @memalign(i64 noundef 8, i64 noundef 42) + ; CHECK: call void @__hipstdpar_free(ptr noundef %4) + call void @free(ptr noundef %4) + + %tmp = alloca ptr, align 8 + ; CHECK: %5 = call i32 @__hipstdpar_posix_aligned_alloc(ptr noundef %tmp, i64 noundef 8, i64 noundef 42) + %5 = call i32 @posix_memalign(ptr noundef %tmp, i64 noundef 8, i64 noundef 42) + ; CHECK: call void @__hipstdpar_free(ptr noundef %tmp) + call void @free(ptr noundef %tmp) + + ; CHECK: %6 = call noalias ptr @__hipstdpar_malloc(i64 noundef 42) + %6 = call noalias ptr @malloc(i64 noundef 42) + ; CHECK: %7 = call ptr @__hipstdpar_realloc(ptr noundef %6, i64 noundef 42) + %7 = call ptr @realloc(ptr noundef %6, i64 noundef 42) + ; CHECK: call void @__hipstdpar_free(ptr noundef %7) + call void @free(ptr noundef %7) + + ; CHECK: %8 = call noalias ptr @__hipstdpar_calloc(i64 noundef 1, i64 noundef 42) + %8 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42) + ; CHECK: %9 = call ptr @__hipstdpar_realloc_array(ptr noundef %8, i64 noundef 1, i64 noundef 42) + %9 = call ptr @reallocarray(ptr noundef %8, i64 noundef 1, i64 noundef 42) + ; CHECK: call void @__hipstdpar_free(ptr noundef %9) + call void @free(ptr noundef %9) + + ; CHECK: %10 = call noalias noundef nonnull ptr @__hipstdpar_operator_new(i64 noundef 1) + %10 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 1) + ; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %10) + call void @_ZdlPv(ptr noundef %10) + + ; CHECK: %11 = call noalias noundef nonnull align 8 ptr @__hipstdpar_operator_new_aligned(i64 noundef 1, i64 noundef 8) + %11 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 1, i64 noundef 8) + ; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %11, i64 noundef 8) + call void @_ZdlPvSt11align_val_t(ptr noundef %11, i64 noundef 8) + + ; CHECK: %12 = call noalias noundef ptr @__hipstdpar_operator_new_nothrow(i64 noundef 1, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + %12 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 1, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + ; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %12) + call void @_ZdlPv(ptr noundef %12) + + ; CHECK: %13 = call noalias noundef align 8 ptr @__hipstdpar_operator_new_aligned_nothrow(i64 noundef 1, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + %13 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 1, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + ; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %13, i64 noundef 8) + call void @_ZdlPvSt11align_val_t(ptr noundef %13, i64 noundef 8) + + ; CHECK: %14 = call noalias noundef nonnull ptr @__hipstdpar_operator_new(i64 noundef 42) + %14 = call noalias noundef nonnull ptr @_Znam(i64 noundef 42) + ; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %14) + call void @_ZdaPv(ptr noundef %14) + + ; CHECK: %15 = call noalias noundef nonnull align 8 ptr @__hipstdpar_operator_new_aligned(i64 noundef 42, i64 noundef 8) + %15 = call noalias noundef nonnull align 8 ptr @_ZnamSt11align_val_t(i64 noundef 42, i64 noundef 8) + ; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %15, i64 noundef 8) + call void @_ZdaPvSt11align_val_t(ptr noundef %15, i64 noundef 8) + + ; CHECK: %16 = call noalias noundef ptr @__hipstdpar_operator_new_nothrow(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + %16 = call noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + ; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %16) + call void @_ZdaPv(ptr noundef %16) + + ; CHECK: %17 = call noalias noundef align 8 ptr @__hipstdpar_operator_new_aligned_nothrow(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + %17 = call noalias noundef align 8 ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + ; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %17, i64 noundef 8) + call void @_ZdaPvSt11align_val_t(ptr noundef %17, i64 noundef 8) + + ; CHECK: %18 = call ptr @__hipstdpar_calloc(i64 noundef 1, i64 noundef 42) + %18 = call ptr @calloc(i64 noundef 1, i64 noundef 42) + ; CHECK: call void @__hipstdpar_free(ptr noundef %18) + call void @free(ptr noundef %18) + + ; CHECK: %19 = call ptr @__hipstdpar_malloc(i64 noundef 42) + %19 = call ptr @malloc(i64 noundef 42) + ; CHECK: call void @__hipstdpar_free(ptr noundef %19) + call void @free(ptr noundef %19) + + ; CHECK: %20 = call noalias noundef nonnull ptr @__hipstdpar_operator_new(i64 noundef 42) + %20 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 42) + ; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %20) + call void @_ZdlPv(ptr noundef %20) + + ; CHECK: %21 = call noalias noundef nonnull align 8 ptr @__hipstdpar_operator_new_aligned(i64 noundef 42, i64 noundef 8) + %21 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 42, i64 noundef 8) + ; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %21, i64 noundef 8) + call void @_ZdlPvSt11align_val_t(ptr noundef %21, i64 noundef 8) + + ; CHECK: %22 = call noalias noundef ptr @__hipstdpar_operator_new_nothrow(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + %22 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + ; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %22) + call void @_ZdlPv(ptr noundef %22) + + ; CHECK: %23 = call noalias noundef align 8 ptr @__hipstdpar_operator_new_aligned_nothrow(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + %23 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + ; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %23, i64 noundef 8) + call void @_ZdlPvSt11align_val_t(ptr noundef %23, i64 noundef 8) + + ; CHECK: %24 = call ptr @__hipstdpar_malloc(i64 noundef 42) + %24 = call ptr @malloc(i64 noundef 42) + ; CHECK: %25 = call ptr @__hipstdpar_realloc(ptr noundef %24, i64 noundef 41) + %25 = call ptr @realloc(ptr noundef %24, i64 noundef 41) + ; CHECK: call void @__hipstdpar_free(ptr noundef %25) + call void @free(ptr noundef %25) + + ; CHECK: %26 = call ptr @__hipstdpar_calloc(i64 noundef 1, i64 noundef 42) + %26 = call ptr @__libc_calloc(i64 noundef 1, i64 noundef 42) + ; CHECK: call void @__hipstdpar_free(ptr noundef %26) + call void @__libc_free(ptr noundef %26) + + ; CHECK: %27 = call ptr @__hipstdpar_malloc(i64 noundef 42) + %27 = call ptr @__libc_malloc(i64 noundef 42) + ; CHECK: call void @__hipstdpar_free(ptr noundef %27) + call void @__libc_free(ptr noundef %27) + + ; CHECK: %28 = call ptr @__hipstdpar_aligned_alloc(i64 noundef 8, i64 noundef 42) + %28 = call ptr @__libc_memalign(i64 noundef 8, i64 noundef 42) + ; CHECK: call void @__hipstdpar_free(ptr noundef %28) + call void @__libc_free(ptr noundef %28) + + ret i32 0 +} + +declare noalias ptr @aligned_alloc(i64 noundef, i64 noundef) + +declare void @free(ptr noundef) + +declare noalias ptr @calloc(i64 noundef, i64 noundef) + +declare noalias ptr @malloc(i64 noundef) + +declare noalias ptr @memalign(i64 noundef, i64 noundef) + +declare i32 @posix_memalign(ptr noundef, i64 noundef, i64 noundef) + +declare ptr @realloc(ptr noundef, i64 noundef) + +declare ptr @reallocarray(ptr noundef, i64 noundef, i64 noundef) + +declare noundef nonnull ptr @_Znwm(i64 noundef) + +declare void @_ZdlPv(ptr noundef) + +declare noalias noundef nonnull ptr @_ZnwmSt11align_val_t(i64 noundef, i64 noundef) + +declare void @_ZdlPvSt11align_val_t(ptr noundef, i64 noundef) + +declare noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare noalias noundef ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare noundef nonnull ptr @_Znam(i64 noundef) + +declare void @_ZdaPv(ptr noundef) + +declare noalias noundef nonnull ptr @_ZnamSt11align_val_t(i64 noundef, i64 noundef) + +declare void @_ZdaPvSt11align_val_t(ptr noundef, i64 noundef) + +declare noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare noalias noundef ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare ptr @__libc_calloc(i64 noundef, i64 noundef) + +declare void @__libc_free(ptr noundef) + +declare ptr @__libc_malloc(i64 noundef) + +declare ptr @__libc_memalign(i64 noundef, i64 noundef) \ No newline at end of file Index: llvm/test/Transforms/HipStdPar/allocation-no-interposition.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HipStdPar/allocation-no-interposition.ll @@ -0,0 +1,161 @@ +; RUN: opt < %s -passes=hipstdpar-interpose-alloc -S 2>&1 | FileCheck %s + +; CHECK: warning: {{.*}} aligned_alloc {{.*}} cannot be interposed, missing: __hipstdpar_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} free {{.*}} cannot be interposed, missing: __hipstdpar_free. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} calloc {{.*}} cannot be interposed, missing: __hipstdpar_calloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} malloc {{.*}} cannot be interposed, missing: __hipstdpar_malloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} memalign {{.*}} cannot be interposed, missing: __hipstdpar_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} posix_memalign {{.*}} cannot be interposed, missing: __hipstdpar_posix_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} realloc {{.*}} cannot be interposed, missing: __hipstdpar_realloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} reallocarray {{.*}} cannot be interposed, missing: __hipstdpar_realloc_array. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _Znwm {{.*}} cannot be interposed, missing: __hipstdpar_operator_new. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZdlPv {{.*}} cannot be interposed, missing: __hipstdpar_operator_delete. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZnwmSt11align_val_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_new_aligned. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZdlPvSt11align_val_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_delete_aligned. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZnwmRKSt9nothrow_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_new_nothrow. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZnwmSt11align_val_tRKSt9nothrow_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_new_aligned_nothrow. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _Znam {{.*}} cannot be interposed, missing: __hipstdpar_operator_new. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZdaPv {{.*}} cannot be interposed, missing: __hipstdpar_operator_delete. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZnamSt11align_val_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_new_aligned. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZdaPvSt11align_val_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_delete_aligned. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZnamRKSt9nothrow_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_new_nothrow. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZnamSt11align_val_tRKSt9nothrow_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_new_aligned_nothrow. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} __libc_calloc {{.*}} cannot be interposed, missing: __hipstdpar_calloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} __libc_free {{.*}} cannot be interposed, missing: __hipstdpar_free. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} __libc_malloc {{.*}} cannot be interposed, missing: __hipstdpar_malloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} __libc_memalign {{.*}} cannot be interposed, missing: __hipstdpar_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available. + +%"struct.std::nothrow_t" = type { i8 } + +@_ZSt7nothrow = external global %"struct.std::nothrow_t", align 1 + +define dso_local noundef i32 @allocs() { + %1 = call noalias align 8 ptr @aligned_alloc(i64 noundef 8, i64 noundef 42) + call void @free(ptr noundef %1) + + %2 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42) + call void @free(ptr noundef %2) + + %3 = call noalias ptr @malloc(i64 noundef 42) + call void @free(ptr noundef %3) + + %4 = call noalias align 8 ptr @memalign(i64 noundef 8, i64 noundef 42) + call void @free(ptr noundef %4) + + %tmp = alloca ptr, align 8 + %5 = call i32 @posix_memalign(ptr noundef %tmp, i64 noundef 8, i64 noundef 42) + call void @free(ptr noundef %tmp) + + %6 = call noalias ptr @malloc(i64 noundef 42) + %7 = call ptr @realloc(ptr noundef %6, i64 noundef 42) + call void @free(ptr noundef %7) + + %8 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42) + %9 = call ptr @reallocarray(ptr noundef %8, i64 noundef 1, i64 noundef 42) + call void @free(ptr noundef %9) + + %10 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 1) + call void @_ZdlPv(ptr noundef %10) + + %11 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 1, i64 noundef 8) + call void @_ZdlPvSt11align_val_t(ptr noundef %11, i64 noundef 8) + + %12 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 1, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + call void @_ZdlPv(ptr noundef %12) + + %13 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 1, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + call void @_ZdlPvSt11align_val_t(ptr noundef %13, i64 noundef 8) + + %14 = call noalias noundef nonnull ptr @_Znam(i64 noundef 42) + call void @_ZdaPv(ptr noundef %14) + + %15 = call noalias noundef nonnull align 8 ptr @_ZnamSt11align_val_t(i64 noundef 42, i64 noundef 8) + call void @_ZdaPvSt11align_val_t(ptr noundef %15, i64 noundef 8) + + %16 = call noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + call void @_ZdaPv(ptr noundef %16) + + %17 = call noalias noundef align 8 ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + call void @_ZdaPvSt11align_val_t(ptr noundef %17, i64 noundef 8) + + %18 = call ptr @calloc(i64 noundef 1, i64 noundef 42) + call void @free(ptr noundef %18) + + %19 = call ptr @malloc(i64 noundef 42) + call void @free(ptr noundef %19) + + %20 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 42) + call void @_ZdlPv(ptr noundef %20) + + %21 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 42, i64 noundef 8) + call void @_ZdlPvSt11align_val_t(ptr noundef %21, i64 noundef 8) + + %22 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + call void @_ZdlPv(ptr noundef %22) + + %23 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + call void @_ZdlPvSt11align_val_t(ptr noundef %23, i64 noundef 8) + + %24 = call ptr @malloc(i64 noundef 42) + %25 = call ptr @realloc(ptr noundef %24, i64 noundef 41) + call void @free(ptr noundef %25) + + %26 = call ptr @__libc_calloc(i64 noundef 1, i64 noundef 42) + call void @__libc_free(ptr noundef %26) + + %27 = call ptr @__libc_malloc(i64 noundef 42) + call void @__libc_free(ptr noundef %27) + + %28 = call ptr @__libc_memalign(i64 noundef 8, i64 noundef 42) + call void @__libc_free(ptr noundef %28) + + ret i32 0 +} + +declare noalias ptr @aligned_alloc(i64 noundef, i64 noundef) + +declare void @free(ptr noundef) + +declare noalias ptr @calloc(i64 noundef, i64 noundef) + +declare noalias ptr @malloc(i64 noundef) + +declare noalias ptr @memalign(i64 noundef, i64 noundef) + +declare i32 @posix_memalign(ptr noundef, i64 noundef, i64 noundef) + +declare ptr @realloc(ptr noundef, i64 noundef) + +declare ptr @reallocarray(ptr noundef, i64 noundef, i64 noundef) + +declare noundef nonnull ptr @_Znwm(i64 noundef) + +declare void @_ZdlPv(ptr noundef) + +declare noalias noundef nonnull ptr @_ZnwmSt11align_val_t(i64 noundef, i64 noundef) + +declare void @_ZdlPvSt11align_val_t(ptr noundef, i64 noundef) + +declare noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare noalias noundef ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare noundef nonnull ptr @_Znam(i64 noundef) + +declare void @_ZdaPv(ptr noundef) + +declare noalias noundef nonnull ptr @_ZnamSt11align_val_t(i64 noundef, i64 noundef) + +declare void @_ZdaPvSt11align_val_t(ptr noundef, i64 noundef) + +declare noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare noalias noundef ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare ptr @__libc_calloc(i64 noundef, i64 noundef) + +declare void @__libc_free(ptr noundef) + +declare ptr @__libc_malloc(i64 noundef) + +declare ptr @__libc_memalign(i64 noundef, i64 noundef) \ No newline at end of file Index: llvm/test/Transforms/HipStdPar/unsupported-asm.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HipStdPar/unsupported-asm.ll @@ -0,0 +1,12 @@ +; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \ +; RUN: %s 2>&1 | FileCheck %s + +; CHECK: error: {{.*}} in function foo void (): Accelerator does not support the ASM block: +; CHECK-NEXT: {{.*}}Invalid ASM block{{.*}} +define amdgpu_kernel void @foo() { +entry: + call void @__ASM__hipstdpar_unsupported([18 x i8] c"Invalid ASM block\00") + ret void +} + +declare void @__ASM__hipstdpar_unsupported([18 x i8]) \ No newline at end of file Index: llvm/test/Transforms/HipStdPar/unsupported-builtins.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HipStdPar/unsupported-builtins.ll @@ -0,0 +1,11 @@ +; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \ +; RUN: %s 2>&1 | FileCheck %s + +; CHECK: error: {{.*}} in function foo void (): Accelerator does not support the __builtin_ia32_pause function +define amdgpu_kernel void @foo() { +entry: + call void @__builtin_ia32_pause__hipstdpar_unsupported() + ret void +} + +declare void @__builtin_ia32_pause__hipstdpar_unsupported() \ No newline at end of file Index: llvm/test/Transforms/HipStdPar/unsupported-thread-local-direct-use.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HipStdPar/unsupported-thread-local-direct-use.ll @@ -0,0 +1,14 @@ +; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \ +; RUN: %s 2>&1 | FileCheck %s + +@tls = hidden thread_local addrspace(1) global i32 0, align 4 + +; CHECK: error: {{.*}} in function direct_use void (): Accelerator does not support the thread_local variable tls +define amdgpu_kernel void @direct_use() { +entry: + %0 = call align 4 ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @tls) + %1 = load i32, ptr addrspace(1) %0, align 4 + ret void +} + +declare nonnull ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) nonnull) Index: llvm/test/Transforms/HipStdPar/unsupported-thread-local-indirect-use.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HipStdPar/unsupported-thread-local-indirect-use.ll @@ -0,0 +1,14 @@ +; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \ +; RUN: %s 2>&1 | FileCheck %s + +@tls = hidden thread_local addrspace(1) global i32 0, align 4 + +; CHECK: error: {{.*}} in function indirect_use void (): Accelerator does not support the thread_local variable tls +define amdgpu_kernel void @indirect_use() { +entry: + %0 = call align 4 ptr @llvm.threadlocal.address.p0(ptr addrspacecast (ptr addrspace(1) @tls to ptr)) + %1 = load i32, ptr %0, align 4 + ret void +} + +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)