diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5060,7 +5060,8 @@ if (getTarget().getTriple().isNVPTX()) return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); if (getTarget().getTriple().getArch() == Triple::amdgcn && - getLangOpts().HIP) + (getLangOpts().HIP || (getLangOpts().OpenMPIsDevice && + getLangOpts().OpenMPTargetNewRuntime))) return EmitAMDGPUDevicePrintfCallExpr(E, ReturnValue); break; case Builtin::BI__builtin_canonicalize: diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp --- a/clang/lib/CodeGen/CGGPUBuiltin.cpp +++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp @@ -21,24 +21,30 @@ using namespace clang; using namespace CodeGen; -static llvm::Function *GetVprintfDeclaration(llvm::Module &M) { +static llvm::Function *GetVprintfDeclaration(CodeGenModule &CGM) { + bool UsesNewOpenMPDeviceRuntime = CGM.getLangOpts().OpenMPIsDevice && + CGM.getLangOpts().OpenMPTargetNewRuntime; + const char *Name = + UsesNewOpenMPDeviceRuntime ? "__llvm_omp_vprintf" : "vprintf"; + llvm::Module &M = CGM.getModule(); llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()), llvm::Type::getInt8PtrTy(M.getContext())}; llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); - if (auto* F = M.getFunction("vprintf")) { + if (auto *F = M.getFunction(Name)) { // Our CUDA system header declares vprintf with the right signature, so // nobody else should have been able to declare vprintf with a bogus - // signature. + // signature. The OpenMP device runtime provides a wrapper around vprintf + // which we use here. The signature should match though. assert(F->getFunctionType() == VprintfFuncType); return F; } - // vprintf doesn't already exist; create a declaration and insert it into the - // module. + // vprintf, or for OpenMP device offloading the vprintf wrapper, doesn't + // already exist; create a declaration and insert it into the module. return llvm::Function::Create( - VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M); + VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M); } // Transforms a call to printf into a call to the NVPTX vprintf syscall (which @@ -117,7 +123,7 @@ } // Invoke vprintf and return. - llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule()); + llvm::Function *VprintfFunc = GetVprintfDeclaration(CGM); return RValue::get(Builder.CreateCall( VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr})); } @@ -130,6 +136,12 @@ E->getBuiltinCallee() == Builtin::BI__builtin_printf); assert(E->getNumArgs() >= 1); // printf always has at least one arg. + // For OpenMP target offloading we go with a modified nvptx printf method. + // Basically creating calls to __llvm_omp_vprintf with the arguments and + // dealing with the details in the device runtime itself. + if (getLangOpts().OpenMPIsDevice && getLangOpts().OpenMPTargetNewRuntime) + return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); + CallArgList CallArgs; EmitCallArgs(CallArgs, E->getDirectCallee()->getType()->getAs(), diff --git a/openmp/libomptarget/DeviceRTL/include/Debug.h b/openmp/libomptarget/DeviceRTL/include/Debug.h --- a/openmp/libomptarget/DeviceRTL/include/Debug.h +++ b/openmp/libomptarget/DeviceRTL/include/Debug.h @@ -32,17 +32,12 @@ /// macro. /// { -#ifndef __AMDGCN__ extern "C" { int printf(const char *format, ...); } #define PRINTF(fmt, ...) (void)printf(fmt, __VA_ARGS__); #define PRINT(str) PRINTF("%s", str) -#else -#define PRINTF(fmt, ...) -#define PRINT(str) -#endif ///} diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -352,6 +352,9 @@ int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size); int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size); ///} + +/// Printf +int32_t __llvm_omp_vprintf(const char *Format, void *Arguments); } #endif diff --git a/openmp/libomptarget/DeviceRTL/src/Debug.cpp b/openmp/libomptarget/DeviceRTL/src/Debug.cpp --- a/openmp/libomptarget/DeviceRTL/src/Debug.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Debug.cpp @@ -35,6 +35,15 @@ assertion); __builtin_trap(); } + +// We do not have a vprintf implementation for AMD GPU yet so we use a stub. +#pragma omp begin declare variant match(device = {arch(amdgcn)}) +int32_t vprintf(const char *, void *) { return 0; } +#pragma omp end declare variant + +int32_t __llvm_omp_vprintf(const char *Format, void *Arguments) { + return vprintf(Format, Arguments); +} } /// Current indentation level for the function trace. Only accessed by thread 0.