Index: llvm/include/llvm/Transforms/StdPar/StdPar.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/StdPar/StdPar.h @@ -0,0 +1,46 @@ +//===- StdPar.h - Standard Parallelism passes -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// AcceleratorCodeSelection - Identify all functions reachable from a kernel, +/// removing those that are unreachable. +/// +/// AllocationInterposition - Forward calls to allocation / deallocation +// functions to runtime provided equivalents that allocate memory that is +// accessible for an accelerator +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_STDPAR_STDPAR_H +#define LLVM_TRANSFORMS_STDPAR_STDPAR_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; +class ModuleAnaysisManager; + +class StdParAcceleratorCodeSelectionPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); + + static bool isRequired() { return true; } +}; + +class StdParAllocationInterpositionPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); + + static bool isRequired() { return true; } +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_STDPAR_STDPAR_H Index: llvm/lib/Passes/CMakeLists.txt =================================================================== --- llvm/lib/Passes/CMakeLists.txt +++ llvm/lib/Passes/CMakeLists.txt @@ -24,6 +24,7 @@ IRPrinter ObjCARC Scalar + StdPar Support Target TransformUtils Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -220,6 +220,7 @@ #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/Sink.h" #include "llvm/Transforms/Scalar/SpeculativeExecution.h" +#include "llvm/Transforms/StdPar/StdPar.h" #include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h" #include "llvm/Transforms/Scalar/StructurizeCFG.h" #include "llvm/Transforms/Scalar/TLSVariableHoist.h" Index: llvm/lib/Passes/PassBuilderPipelines.cpp =================================================================== --- llvm/lib/Passes/PassBuilderPipelines.cpp +++ llvm/lib/Passes/PassBuilderPipelines.cpp @@ -118,6 +118,7 @@ #include "llvm/Transforms/Scalar/SpeculativeExecution.h" #include "llvm/Transforms/Scalar/TailRecursionElimination.h" #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" +#include "llvm/Transforms/StdPar/StdPar.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -107,6 +107,9 @@ MODULE_PASS("sample-profile", SampleProfileLoaderPass()) MODULE_PASS("scc-oz-module-inliner", buildInlinerPipeline(OptimizationLevel::Oz, ThinOrFullLTOPhase::None)) +MODULE_PASS("stdpar-select-accelerator-code", + StdParAcceleratorCodeSelectionPass()) +MODULE_PASS("stdpar-interpose-alloc", StdParAllocationInterpositionPass()) MODULE_PASS("strip", StripSymbolsPass()) MODULE_PASS("strip-dead-debug-info", StripDeadDebugInfoPass()) MODULE_PASS("pseudo-probe", SampleProfileProbePass(TM)) Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -57,6 +57,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InferAddressSpaces.h" +#include "llvm/Transforms/StdPar/StdPar.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" @@ -349,6 +350,11 @@ cl::desc("Enable rewrite partial reg uses pass"), cl::init(false), cl::Hidden); +static cl::opt EnableStdPar( + "amdgpu-enable-stdpar", + cl::desc("Enable Standard Parallelism Offload support"), cl::init(false), + cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine X(getTheR600Target()); @@ -687,6 +693,8 @@ if (EnableLibCallSimplify && Level != OptimizationLevel::O0) FPM.addPass(AMDGPUSimplifyLibCallsPass()); PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + if (EnableStdPar) + PM.addPass(StdParAcceleratorCodeSelectionPass()); }); PB.registerPipelineEarlySimplificationEPCallback( Index: llvm/lib/Transforms/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/CMakeLists.txt +++ llvm/lib/Transforms/CMakeLists.txt @@ -9,3 +9,4 @@ add_subdirectory(ObjCARC) add_subdirectory(Coroutines) add_subdirectory(CFGuard) +add_subdirectory(StdPar) Index: llvm/lib/Transforms/StdPar/CMakeLists.txt =================================================================== --- /dev/null +++ llvm/lib/Transforms/StdPar/CMakeLists.txt @@ -0,0 +1,13 @@ +add_llvm_component_library(LLVMStdPar + StdPar.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/StdPar + + DEPENDS + intrinsics_gen + + LINK_COMPONENTS + Core + Support + TransformUtils) Index: llvm/lib/Transforms/StdPar/StdPar.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/StdPar/StdPar.cpp @@ -0,0 +1,267 @@ +//===------ StdPar.cpp - C++ Standard Parallelism Support Passes ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file implements two passes that enable C++ Standard Parallelism Support: +// +// 1. AcceleratorCodeSelection (required): Given that only algorithms are +// accelerated, and that the accelerated implementation exists in the form of +// a compute kernel, we assume that only the kernel, and all functions +// reachable from it, constitute code that the user expects the accelerator +// to execute. Thus, we identify the set of all functions reachable from +// kernels, and then remove all unreachable ones. This last part is necessary +// because it is possible for code that the user did not expect to execute on +// an accelerator to contain constructs that cannot be handled by the target +// BE, which cannot be provably demonstrated to be dead code in general, and +// thus can lead to mis-compilation. The degenerate case of this is when a +// Module contains no kernels (the parent TU had no algorithm invocations fit +// for acceleration), which we handle by completely emptying said module. +// **NOTE**: The above does not handle indirectly reachable functions i.e. +// it is possible to obtain a case where the target of an indirect +// call is otherwise unreachable and thus is removed; this +// restriction is aligned with the current `-stdpar` limitations +// and will be relaxed in the future. +// +// 2. AllocationInterposition (required only when on-demand paging is +// unsupported): Some accelerators or operating systems might not support +// transparent on-demand paging. Thus, they would only be able to access +// memory that is allocated by an accelerator-aware mechanism. For such cases +// the user can opt into enabling allocation / deallocation interposition, +// whereby we replace calls to known allocation / deallocation functions with +// calls to runtime implemented equivalents that forward the requests to +// accelerator-aware interfaces. We also support freeing system allocated +// memory that ends up in one of the runtime equivalents, since this can +// happen if e.g. a library that was compiled without interposition returns +// an allocation that can be validly passed to `free`. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/StdPar/StdPar.h" + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +#include +#include + +using namespace llvm; + +template +static inline void eraseFromModule(T &toErase) { + toErase.replaceAllUsesWith(PoisonValue::get(toErase.getType())); + toErase.eraseFromParent(); +} + +static inline void maybeHandleGlobals(Module &M) { + unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace(); + for (auto &&G : M.globals()) { // TODO: should we handle these in the FE? + if (G.isConstant()) + continue; + if (G.getAddressSpace() != GlobAS) + continue; + if (G.getLinkage() != GlobalVariable::ExternalLinkage) + continue; + + G.setLinkage(GlobalVariable::ExternalWeakLinkage); + G.setExternallyInitialized(true); + } +} + +static inline void clearModule(Module &M) { // TODO: simplify. + while (!M.functions().empty()) + eraseFromModule(*M.begin()); + while (!M.globals().empty()) + eraseFromModule(*M.globals().begin()); + while (!M.aliases().empty()) + eraseFromModule(*M.aliases().begin()); + while (!M.ifuncs().empty()) + eraseFromModule(*M.ifuncs().begin()); +} + +template +static inline void removeUnreachableFunctions( + const SmallPtrSet& Reachable, Module &M) { + removeFromUsedLists(M, [&](Constant *C) { + if (auto F = dyn_cast(C)) + return !Reachable.contains(F); + + return false; + }); + + SmallVector> ToRemove; + copy_if(M, std::back_inserter(ToRemove), [&](auto &&F) { + return !F.isIntrinsic() && !Reachable.contains(&F); + }); + + for_each(ToRemove, eraseFromModule); +} + +static inline bool IsAcceleratorExecutionRoot(const Function *F) { + if (!F) + return false; + + // As support for additional accelerator stacks is added, this switch should + // be extended to include the corresponding calling conventions. + switch (F->getCallingConv()) { + case CallingConv::PTX_Kernel: + return true; + case CallingConv::SPIR_KERNEL: + return true; + case CallingConv::AMDGPU_KERNEL: + return true; + default: + return false; + } +} + +static inline void CheckIfSupported(const Function *F) { + const auto Dx = F->getName().rfind("__stdpar_unsupported"); + + if (Dx == StringRef::npos) + return; + + const auto N = F->getName().substr(0, Dx); + + std::string W; + raw_string_ostream OS(W); + + OS << "Accelerator does not support the " << N << " function."; + + for (auto &&U : F->users()) + if (auto I = dyn_cast_or_null(U)) { + auto Caller = I->getParent()->getParent(); + + return Caller->getContext().diagnose( + DiagnosticInfoUnsupported(*Caller, W, I->getDebugLoc(), DS_Error)); + } +} + +PreservedAnalyses +StdParAcceleratorCodeSelectionPass::run(Module &M, ModuleAnalysisManager &MAM) { + auto &CGA = MAM.getResult(M); + + SmallPtrSet Reachable; + for (auto &&CGN : CGA) { + if (!IsAcceleratorExecutionRoot(CGN.first)) + continue; + + Reachable.insert(CGN.first); + + SmallVector Tmp({CGN.first}); + do { + auto F = std::move(Tmp.back()); + Tmp.pop_back(); + + for (auto &&N : *CGA[F]) { + if (!N.second) + continue; + if (!N.second->getFunction()) + continue; + if (Reachable.contains(N.second->getFunction())) + continue; + + CheckIfSupported(N.second->getFunction()); + + Reachable.insert(N.second->getFunction()); + Tmp.push_back(N.second->getFunction()); + } + } while (!std::empty(Tmp)); + } + + if (std::empty(Reachable)) + clearModule(M); + else + removeUnreachableFunctions(Reachable, M); + + maybeHandleGlobals(M); + + return PreservedAnalyses::none(); +} + +static constexpr std::pair ReplaceMap[]{ + {"aligned_alloc", "__stdpar_aligned_alloc"}, + {"calloc", "__stdpar_calloc"}, + {"free", "__stdpar_free"}, + {"malloc", "__stdpar_malloc"}, + {"memalign", "__stdpar_aligned_alloc"}, + {"posix_memalign", "__stdpar_posix_aligned_alloc"}, + {"realloc", "__stdpar_realloc"}, + {"reallocarray", "__stdpar_realloc_array"}, + {"_ZdaPv", "__stdpar_operator_delete"}, + {"_ZdaPvm", "__stdpar_operator_delete_sized"}, + {"_ZdaPvSt11align_val_t", "__stdpar_operator_delete_aligned"}, + {"_ZdaPvmSt11align_val_t", "__stdpar_operator_delete_aligned_sized"}, + {"_ZdlPv", "__stdpar_operator_delete"}, + {"_ZdlPvm", "__stdpar_operator_delete_sized"}, + {"_ZdlPvSt11align_val_t", "__stdpar_operator_delete_aligned"}, + {"_ZdlPvmSt11align_val_t", "__stdpar_operator_delete_aligned_sized"}, + {"_Znam", "__stdpar_operator_new"}, + {"_ZnamRKSt9nothrow_t", "__stdpar_operator_new_nothrow"}, + {"_ZnamSt11align_val_t", "__stdpar_operator_new_aligned"}, + {"_ZnamSt11align_val_tRKSt9nothrow_t", + "__stdpar_operator_new_aligned_nothrow"}, + + {"_Znwm", "__stdpar_operator_new"}, + {"_ZnwmRKSt9nothrow_t", "__stdpar_operator_new_nothrow"}, + {"_ZnwmSt11align_val_t", "__stdpar_operator_new_aligned"}, + {"_ZnwmSt11align_val_tRKSt9nothrow_t", + "__stdpar_operator_new_aligned_nothrow"}, + {"__builtin_calloc", "__stdpar_calloc"}, + {"__builtin_free", "__stdpar_free"}, + {"__builtin_malloc", "__stdpar_malloc"}, + {"__builtin_operator_delete", "__stdpar_operator_delete"}, + {"__builtin_operator_new", "__stdpar_operator_new"}, + {"__builtin_realloc", "__stdpar_realloc"}, + {"__libc_calloc", "__stdpar_calloc"}, + {"__libc_free", "__stdpar_free"}, + {"__libc_malloc", "__stdpar_malloc"}, + {"__libc_memalign", "__stdpar_aligned_alloc"}, + {"__libc_realloc", "__stdpar_realloc"} +}; + +PreservedAnalyses +StdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) { + SmallDenseMap AllocReplacements(std::cbegin(ReplaceMap), + std::cend(ReplaceMap)); + + for (auto &&F : M) { + if (!F.hasName()) + continue; + if (!AllocReplacements.contains(F.getName())) + continue; + + if (auto R = M.getFunction(AllocReplacements[F.getName()])) { + F.replaceAllUsesWith(R); + } else { + std::string W; + raw_string_ostream OS(W); + + OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()] + << ". Tried to run the allocation interposition pass without the " + << "replacement functions available."; + + F.getContext().diagnose(DiagnosticInfoUnsupported(F, W, + F.getSubprogram(), + DS_Warning)); + } + } + + if (auto F = M.getFunction("__stdpar_hidden_free")) { + auto LibcFree = M.getOrInsertFunction("__libc_free", F->getFunctionType(), + F->getAttributes()); + F->replaceAllUsesWith(LibcFree.getCallee()); + + eraseFromModule(*F); + } + + return PreservedAnalyses::none(); +} Index: llvm/test/Transforms/StdPar/accelerator-code-selection.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/StdPar/accelerator-code-selection.ll @@ -0,0 +1,116 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=stdpar-select-accelerator-code \ +; RUN: %s | FileCheck %s + +$_ZNK8CallableclEPi = comdat any +$_ZNK8CallableclEPf = comdat any +$_ZNK8Callable6mem_fnEPKi = comdat any +$_ZN8Callable13static_mem_fnEPKi = comdat any +; CHECK-NOT: $_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf +$_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf = comdat any +; CHECK-NOT: $_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf +$_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf = comdat any + +%struct.Callable = type { [64 x i8] } + +; CHECK-NOT: @should_be_removed +@llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr @should_be_removed], section "llvm.metadata" + +define void @should_be_removed(ptr %p) { + ret void +} + +declare void @llvm.trap() + +; CHECK: define {{.*}} @called_via_chain +define void @called_via_chain(ptr %p) { + entry: + %tobool.not = icmp eq ptr %p, null + br i1 %tobool.not, label %if.then, label %if.end + + if.then: + tail call void @llvm.trap() + unreachable + + if.end: + ret void +} + +; CHECK: define {{.*}} @directly_called +define void @directly_called(ptr %p) { + tail call void @called_via_chain(ptr %p) + ret void +} + +; CHECK: define {{.*}} amdgpu_kernel {{.*}} @accelerator_execution_root +define hidden amdgpu_kernel void @accelerator_execution_root(ptr %p) { + tail call void @directly_called(ptr %p) + ret void +} + +; CHECK-NOT: @defined_elsewhere_should_be_removed +declare void @defined_elsewhere_should_be_removed(ptr) + +; CHECK: declare {{.*}} @defined_elsewhere_directly_called +declare void @defined_elsewhere_directly_called(ptr) + +; CHECK: define {{.*}} amdgpu_kernel {{.*}} @another_accelerator_execution_root +define hidden amdgpu_kernel void @another_accelerator_execution_root(ptr %p) { + tail call void @defined_elsewhere_directly_called(ptr %p) + ret void +} + +; Also test passing a callable object (functor / lambda) to a kernel, which is +; the common pattern for customising algorithms. + +; CHECK: define {{.*}} amdgpu_kernel {{.*}} @_Z22accelerator_execution_root_taking_callablePi8Callable +define hidden amdgpu_kernel void @_Z22accelerator_execution_root_taking_callablePi8Callable(ptr noundef %p, ptr addrspace(4) nocapture readonly byref(%struct.Callable) align 8 %callable) { + %callable_in_generic = addrspacecast ptr addrspace(4) %callable to ptr + call void @_ZNK8CallableclEPi(ptr noundef nonnull align 1 dereferenceable(64) %callable_in_generic, ptr noundef %p) + + ret void +} + +; CHECK: define {{.*}} @_ZNK8CallableclEPi +define linkonce_odr dso_local void @_ZNK8CallableclEPi(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) { + call void @_ZNK8Callable6mem_fnEPKi(ptr noundef nonnull align 1 dereferenceable(1) %this, ptr noundef %p) + + ret void +} + +; CHECK: define {{.*}} @_ZNK8Callable6mem_fnEPKi +define linkonce_odr dso_local void @_ZNK8Callable6mem_fnEPKi(ptr noundef nonnull align 1 dereferenceable(1) %this, ptr noundef %p) { + call void @_ZN8Callable13static_mem_fnEPKi(ptr noundef %p) + + ret void +} + +; CHECK: define {{.*}} @_ZN8Callable13static_mem_fnEPKi +define linkonce_odr dso_local void @_ZN8Callable13static_mem_fnEPKi(ptr noundef %p) { + ret void +} + +; CHECK-NOT: define {{.*}} @_Z26non_kernel_taking_callablePf8Callable +define dso_local void @_Z26non_kernel_taking_callablePf8Callable(ptr noundef %p, ptr noundef byval(%struct.Callable) align 8 %callable) { + call void @_ZNK8CallableclEPf(ptr noundef nonnull align 1 dereferenceable(64) %callable, ptr noundef %p) + + ret void +} + +; CHECK-NOT: define {{.*}} @_ZNK8CallableclEPf +define linkonce_odr dso_local void @_ZNK8CallableclEPf(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) { + call void @_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) + + ret void +} + +; CHECK-NOT: @_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf +define linkonce_odr dso_local void @_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) { + call void @_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf(ptr noundef %p) + + ret void +} + +; CHECK-NOT: @_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf +define linkonce_odr dso_local void @_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf(ptr noundef %p) { + ret void +} Index: llvm/test/Transforms/StdPar/allocation-interposition.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/StdPar/allocation-interposition.ll @@ -0,0 +1,221 @@ +; RUN: opt -S -passes=stdpar-interpose-alloc %s | FileCheck %s + +%"struct.std::nothrow_t" = type { i8 } + +@_ZSt7nothrow = external global %"struct.std::nothrow_t", align 1 + +declare ptr @__stdpar_aligned_alloc(i64, i64) + +declare ptr @__stdpar_malloc(i64) + +declare ptr @__stdpar_calloc(i64, i64) + +declare i32 @__stdpar_posix_aligned_alloc(ptr, i64, i64) + +declare void @__stdpar_hidden_free(ptr) + +declare ptr @__stdpar_realloc(ptr, i64) + +declare ptr @__stdpar_realloc_array(ptr, i64, i64) + +declare void @__stdpar_free(ptr) + +declare ptr @__stdpar_operator_new_aligned(i64, i64) + +declare ptr @__stdpar_operator_new(i64) + +declare ptr @__stdpar_operator_new_nothrow(i64, %"struct.std::nothrow_t") + +declare ptr @__stdpar_operator_new_aligned_nothrow(i64, i64, %"struct.std::nothrow_t") + +declare void @__stdpar_operator_delete_aligned_sized(ptr, i64, i64) + +declare void @__stdpar_operator_delete(ptr) + +declare void @__stdpar_operator_delete_aligned(ptr, i64) + +declare void @__stdpar_operator_delete_sized(ptr, i64) + +define dso_local noundef i32 @allocs() { + ; CHECK: %1 = call noalias align 8 ptr @__stdpar_aligned_alloc(i64 noundef 8, i64 noundef 42) + %1 = call noalias align 8 ptr @aligned_alloc(i64 noundef 8, i64 noundef 42) + ; CHECK: call void @__stdpar_free(ptr noundef %1) + call void @free(ptr noundef %1) + + ; CHECK: %2 = call noalias ptr @__stdpar_calloc(i64 noundef 1, i64 noundef 42) + %2 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42) + ; CHECK: call void @__stdpar_free(ptr noundef %2) + call void @free(ptr noundef %2) + + ; CHECK: %3 = call noalias ptr @__stdpar_malloc(i64 noundef 42) + %3 = call noalias ptr @malloc(i64 noundef 42) + ; CHECK: call void @__stdpar_free(ptr noundef %3) + call void @free(ptr noundef %3) + + ; CHECK: %4 = call noalias align 8 ptr @__stdpar_aligned_alloc(i64 noundef 8, i64 noundef 42) + %4 = call noalias align 8 ptr @memalign(i64 noundef 8, i64 noundef 42) + ; CHECK: call void @__stdpar_free(ptr noundef %4) + call void @free(ptr noundef %4) + + %tmp = alloca ptr, align 8 + ; CHECK: %5 = call i32 @__stdpar_posix_aligned_alloc(ptr noundef %tmp, i64 noundef 8, i64 noundef 42) + %5 = call i32 @posix_memalign(ptr noundef %tmp, i64 noundef 8, i64 noundef 42) + ; CHECK: call void @__stdpar_free(ptr noundef %tmp) + call void @free(ptr noundef %tmp) + + ; CHECK: %6 = call noalias ptr @__stdpar_malloc(i64 noundef 42) + %6 = call noalias ptr @malloc(i64 noundef 42) + ; CHECK: %7 = call ptr @__stdpar_realloc(ptr noundef %6, i64 noundef 42) + %7 = call ptr @realloc(ptr noundef %6, i64 noundef 42) + ; CHECK: call void @__stdpar_free(ptr noundef %7) + call void @free(ptr noundef %7) + + ; CHECK: %8 = call noalias ptr @__stdpar_calloc(i64 noundef 1, i64 noundef 42) + %8 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42) + ; CHECK: %9 = call ptr @__stdpar_realloc_array(ptr noundef %8, i64 noundef 1, i64 noundef 42) + %9 = call ptr @reallocarray(ptr noundef %8, i64 noundef 1, i64 noundef 42) + ; CHECK: call void @__stdpar_free(ptr noundef %9) + call void @free(ptr noundef %9) + + ; CHECK: %10 = call noalias noundef nonnull ptr @__stdpar_operator_new(i64 noundef 1) + %10 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 1) + ; CHECK: call void @__stdpar_operator_delete(ptr noundef %10) + call void @_ZdlPv(ptr noundef %10) + + ; CHECK: %11 = call noalias noundef nonnull align 8 ptr @__stdpar_operator_new_aligned(i64 noundef 1, i64 noundef 8) + %11 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 1, i64 noundef 8) + ; CHECK: call void @__stdpar_operator_delete_aligned(ptr noundef %11, i64 noundef 8) + call void @_ZdlPvSt11align_val_t(ptr noundef %11, i64 noundef 8) + + ; CHECK: %12 = call noalias noundef ptr @__stdpar_operator_new_nothrow(i64 noundef 1, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + %12 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 1, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + ; CHECK: call void @__stdpar_operator_delete(ptr noundef %12) + call void @_ZdlPv(ptr noundef %12) + + ; CHECK: %13 = call noalias noundef align 8 ptr @__stdpar_operator_new_aligned_nothrow(i64 noundef 1, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + %13 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 1, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + ; CHECK: call void @__stdpar_operator_delete_aligned(ptr noundef %13, i64 noundef 8) + call void @_ZdlPvSt11align_val_t(ptr noundef %13, i64 noundef 8) + + ; CHECK: %14 = call noalias noundef nonnull ptr @__stdpar_operator_new(i64 noundef 42) + %14 = call noalias noundef nonnull ptr @_Znam(i64 noundef 42) + ; CHECK: call void @__stdpar_operator_delete(ptr noundef %14) + call void @_ZdaPv(ptr noundef %14) + + ; CHECK: %15 = call noalias noundef nonnull align 8 ptr @__stdpar_operator_new_aligned(i64 noundef 42, i64 noundef 8) + %15 = call noalias noundef nonnull align 8 ptr @_ZnamSt11align_val_t(i64 noundef 42, i64 noundef 8) + ; CHECK: call void @__stdpar_operator_delete_aligned(ptr noundef %15, i64 noundef 8) + call void @_ZdaPvSt11align_val_t(ptr noundef %15, i64 noundef 8) + + ; CHECK: %16 = call noalias noundef ptr @__stdpar_operator_new_nothrow(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + %16 = call noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + ; CHECK: call void @__stdpar_operator_delete(ptr noundef %16) + call void @_ZdaPv(ptr noundef %16) + + ; CHECK: %17 = call noalias noundef align 8 ptr @__stdpar_operator_new_aligned_nothrow(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + %17 = call noalias noundef align 8 ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + ; CHECK: call void @__stdpar_operator_delete_aligned(ptr noundef %17, i64 noundef 8) + call void @_ZdaPvSt11align_val_t(ptr noundef %17, i64 noundef 8) + + ; CHECK: %18 = call ptr @__stdpar_calloc(i64 noundef 1, i64 noundef 42) + %18 = call ptr @calloc(i64 noundef 1, i64 noundef 42) + ; CHECK: call void @__stdpar_free(ptr noundef %18) + call void @free(ptr noundef %18) + + ; CHECK: %19 = call ptr @__stdpar_malloc(i64 noundef 42) + %19 = call ptr @malloc(i64 noundef 42) + ; CHECK: call void @__stdpar_free(ptr noundef %19) + call void @free(ptr noundef %19) + + ; CHECK: %20 = call noalias noundef nonnull ptr @__stdpar_operator_new(i64 noundef 42) + %20 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 42) + ; CHECK: call void @__stdpar_operator_delete(ptr noundef %20) + call void @_ZdlPv(ptr noundef %20) + + ; CHECK: %21 = call noalias noundef nonnull align 8 ptr @__stdpar_operator_new_aligned(i64 noundef 42, i64 noundef 8) + %21 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 42, i64 noundef 8) + ; CHECK: call void @__stdpar_operator_delete_aligned(ptr noundef %21, i64 noundef 8) + call void @_ZdlPvSt11align_val_t(ptr noundef %21, i64 noundef 8) + + ; CHECK: %22 = call noalias noundef ptr @__stdpar_operator_new_nothrow(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + %22 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + ; CHECK: call void @__stdpar_operator_delete(ptr noundef %22) + call void @_ZdlPv(ptr noundef %22) + + ; CHECK: %23 = call noalias noundef align 8 ptr @__stdpar_operator_new_aligned_nothrow(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + %23 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + ; CHECK: call void @__stdpar_operator_delete_aligned(ptr noundef %23, i64 noundef 8) + call void @_ZdlPvSt11align_val_t(ptr noundef %23, i64 noundef 8) + + ; CHECK: %24 = call ptr @__stdpar_malloc(i64 noundef 42) + %24 = call ptr @malloc(i64 noundef 42) + ; CHECK: %25 = call ptr @__stdpar_realloc(ptr noundef %24, i64 noundef 41) + %25 = call ptr @realloc(ptr noundef %24, i64 noundef 41) + ; CHECK: call void @__stdpar_free(ptr noundef %25) + call void @free(ptr noundef %25) + + ; CHECK: %26 = call ptr @__stdpar_calloc(i64 noundef 1, i64 noundef 42) + %26 = call ptr @__libc_calloc(i64 noundef 1, i64 noundef 42) + ; CHECK: call void @__stdpar_free(ptr noundef %26) + call void @__libc_free(ptr noundef %26) + + ; CHECK: %27 = call ptr @__stdpar_malloc(i64 noundef 42) + %27 = call ptr @__libc_malloc(i64 noundef 42) + ; CHECK: call void @__stdpar_free(ptr noundef %27) + call void @__libc_free(ptr noundef %27) + + ; CHECK: %28 = call ptr @__stdpar_aligned_alloc(i64 noundef 8, i64 noundef 42) + %28 = call ptr @__libc_memalign(i64 noundef 8, i64 noundef 42) + ; CHECK: call void @__stdpar_free(ptr noundef %28) + call void @__libc_free(ptr noundef %28) + + ret i32 0 +} + +declare noalias ptr @aligned_alloc(i64 noundef, i64 noundef) + +declare void @free(ptr noundef) + +declare noalias ptr @calloc(i64 noundef, i64 noundef) + +declare noalias ptr @malloc(i64 noundef) + +declare noalias ptr @memalign(i64 noundef, i64 noundef) + +declare i32 @posix_memalign(ptr noundef, i64 noundef, i64 noundef) + +declare ptr @realloc(ptr noundef, i64 noundef) + +declare ptr @reallocarray(ptr noundef, i64 noundef, i64 noundef) + +declare noundef nonnull ptr @_Znwm(i64 noundef) + +declare void @_ZdlPv(ptr noundef) + +declare noalias noundef nonnull ptr @_ZnwmSt11align_val_t(i64 noundef, i64 noundef) + +declare void @_ZdlPvSt11align_val_t(ptr noundef, i64 noundef) + +declare noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare noalias noundef ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare noundef nonnull ptr @_Znam(i64 noundef) + +declare void @_ZdaPv(ptr noundef) + +declare noalias noundef nonnull ptr @_ZnamSt11align_val_t(i64 noundef, i64 noundef) + +declare void @_ZdaPvSt11align_val_t(ptr noundef, i64 noundef) + +declare noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare noalias noundef ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare ptr @__libc_calloc(i64 noundef, i64 noundef) + +declare void @__libc_free(ptr noundef) + +declare ptr @__libc_malloc(i64 noundef) + +declare ptr @__libc_memalign(i64 noundef, i64 noundef) Index: llvm/test/Transforms/StdPar/allocation-no-interposition.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/StdPar/allocation-no-interposition.ll @@ -0,0 +1,161 @@ +; RUN: opt < %s -passes=stdpar-interpose-alloc -S 2>&1 | FileCheck %s + +; CHECK: warning: {{.*}} aligned_alloc {{.*}} cannot be interposed, missing: __stdpar_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} free {{.*}} cannot be interposed, missing: __stdpar_free. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} calloc {{.*}} cannot be interposed, missing: __stdpar_calloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} malloc {{.*}} cannot be interposed, missing: __stdpar_malloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} memalign {{.*}} cannot be interposed, missing: __stdpar_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} posix_memalign {{.*}} cannot be interposed, missing: __stdpar_posix_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} realloc {{.*}} cannot be interposed, missing: __stdpar_realloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} reallocarray {{.*}} cannot be interposed, missing: __stdpar_realloc_array. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _Znwm {{.*}} cannot be interposed, missing: __stdpar_operator_new. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZdlPv {{.*}} cannot be interposed, missing: __stdpar_operator_delete. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZnwmSt11align_val_t {{.*}} cannot be interposed, missing: __stdpar_operator_new_aligned. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZdlPvSt11align_val_t {{.*}} cannot be interposed, missing: __stdpar_operator_delete_aligned. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZnwmRKSt9nothrow_t {{.*}} cannot be interposed, missing: __stdpar_operator_new_nothrow. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZnwmSt11align_val_tRKSt9nothrow_t {{.*}} cannot be interposed, missing: __stdpar_operator_new_aligned_nothrow. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _Znam {{.*}} cannot be interposed, missing: __stdpar_operator_new. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZdaPv {{.*}} cannot be interposed, missing: __stdpar_operator_delete. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZnamSt11align_val_t {{.*}} cannot be interposed, missing: __stdpar_operator_new_aligned. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZdaPvSt11align_val_t {{.*}} cannot be interposed, missing: __stdpar_operator_delete_aligned. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZnamRKSt9nothrow_t {{.*}} cannot be interposed, missing: __stdpar_operator_new_nothrow. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} _ZnamSt11align_val_tRKSt9nothrow_t {{.*}} cannot be interposed, missing: __stdpar_operator_new_aligned_nothrow. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} __libc_calloc {{.*}} cannot be interposed, missing: __stdpar_calloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} __libc_free {{.*}} cannot be interposed, missing: __stdpar_free. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} __libc_malloc {{.*}} cannot be interposed, missing: __stdpar_malloc. Tried to run the allocation interposition pass without the replacement functions available. +; CHECK: warning: {{.*}} __libc_memalign {{.*}} cannot be interposed, missing: __stdpar_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available. + +%"struct.std::nothrow_t" = type { i8 } + +@_ZSt7nothrow = external global %"struct.std::nothrow_t", align 1 + +define dso_local noundef i32 @allocs() { + %1 = call noalias align 8 ptr @aligned_alloc(i64 noundef 8, i64 noundef 42) + call void @free(ptr noundef %1) + + %2 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42) + call void @free(ptr noundef %2) + + %3 = call noalias ptr @malloc(i64 noundef 42) + call void @free(ptr noundef %3) + + %4 = call noalias align 8 ptr @memalign(i64 noundef 8, i64 noundef 42) + call void @free(ptr noundef %4) + + %tmp = alloca ptr, align 8 + %5 = call i32 @posix_memalign(ptr noundef %tmp, i64 noundef 8, i64 noundef 42) + call void @free(ptr noundef %tmp) + + %6 = call noalias ptr @malloc(i64 noundef 42) + %7 = call ptr @realloc(ptr noundef %6, i64 noundef 42) + call void @free(ptr noundef %7) + + %8 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42) + %9 = call ptr @reallocarray(ptr noundef %8, i64 noundef 1, i64 noundef 42) + call void @free(ptr noundef %9) + + %10 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 1) + call void @_ZdlPv(ptr noundef %10) + + %11 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 1, i64 noundef 8) + call void @_ZdlPvSt11align_val_t(ptr noundef %11, i64 noundef 8) + + %12 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 1, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + call void @_ZdlPv(ptr noundef %12) + + %13 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 1, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + call void @_ZdlPvSt11align_val_t(ptr noundef %13, i64 noundef 8) + + %14 = call noalias noundef nonnull ptr @_Znam(i64 noundef 42) + call void @_ZdaPv(ptr noundef %14) + + %15 = call noalias noundef nonnull align 8 ptr @_ZnamSt11align_val_t(i64 noundef 42, i64 noundef 8) + call void @_ZdaPvSt11align_val_t(ptr noundef %15, i64 noundef 8) + + %16 = call noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + call void @_ZdaPv(ptr noundef %16) + + %17 = call noalias noundef align 8 ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + call void @_ZdaPvSt11align_val_t(ptr noundef %17, i64 noundef 8) + + %18 = call ptr @calloc(i64 noundef 1, i64 noundef 42) + call void @free(ptr noundef %18) + + %19 = call ptr @malloc(i64 noundef 42) + call void @free(ptr noundef %19) + + %20 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 42) + call void @_ZdlPv(ptr noundef %20) + + %21 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 42, i64 noundef 8) + call void @_ZdlPvSt11align_val_t(ptr noundef %21, i64 noundef 8) + + %22 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + call void @_ZdlPv(ptr noundef %22) + + %23 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow) + call void @_ZdlPvSt11align_val_t(ptr noundef %23, i64 noundef 8) + + %24 = call ptr @malloc(i64 noundef 42) + %25 = call ptr @realloc(ptr noundef %24, i64 noundef 41) + call void @free(ptr noundef %25) + + %26 = call ptr @__libc_calloc(i64 noundef 1, i64 noundef 42) + call void @__libc_free(ptr noundef %26) + + %27 = call ptr @__libc_malloc(i64 noundef 42) + call void @__libc_free(ptr noundef %27) + + %28 = call ptr @__libc_memalign(i64 noundef 8, i64 noundef 42) + call void @__libc_free(ptr noundef %28) + + ret i32 0 +} + +declare noalias ptr @aligned_alloc(i64 noundef, i64 noundef) + +declare void @free(ptr noundef) + +declare noalias ptr @calloc(i64 noundef, i64 noundef) + +declare noalias ptr @malloc(i64 noundef) + +declare noalias ptr @memalign(i64 noundef, i64 noundef) + +declare i32 @posix_memalign(ptr noundef, i64 noundef, i64 noundef) + +declare ptr @realloc(ptr noundef, i64 noundef) + +declare ptr @reallocarray(ptr noundef, i64 noundef, i64 noundef) + +declare noundef nonnull ptr @_Znwm(i64 noundef) + +declare void @_ZdlPv(ptr noundef) + +declare noalias noundef nonnull ptr @_ZnwmSt11align_val_t(i64 noundef, i64 noundef) + +declare void @_ZdlPvSt11align_val_t(ptr noundef, i64 noundef) + +declare noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare noalias noundef ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare noundef nonnull ptr @_Znam(i64 noundef) + +declare void @_ZdaPv(ptr noundef) + +declare noalias noundef nonnull ptr @_ZnamSt11align_val_t(i64 noundef, i64 noundef) + +declare void @_ZdaPvSt11align_val_t(ptr noundef, i64 noundef) + +declare noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare noalias noundef ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1)) + +declare ptr @__libc_calloc(i64 noundef, i64 noundef) + +declare void @__libc_free(ptr noundef) + +declare ptr @__libc_malloc(i64 noundef) + +declare ptr @__libc_memalign(i64 noundef, i64 noundef) Index: llvm/test/Transforms/StdPar/unsupported-builtins.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/StdPar/unsupported-builtins.ll @@ -0,0 +1,11 @@ +; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=stdpar-select-accelerator-code \ +; RUN: %s 2>&1 | FileCheck %s + +; CHECK: error: {{.*}} in function foo void (): Accelerator does not support the __builtin_ia32_pause function +define amdgpu_kernel void @foo() { +entry: + call void @__builtin_ia32_pause__stdpar_unsupported() + ret void +} + +declare void @__builtin_ia32_pause__stdpar_unsupported()