Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -1383,15 +1383,47 @@ /// A list of functions that are available in NVIDIA's libdevice. const std::set CUDALibDeviceFunctions = { - "exp", "expf", "expl", "cos", "cosf", "sqrt", - "sqrtf", "copysign", "copysignf", "copysignl", "log", "logf"}; + "exp", "expf", "expl", "cos", "cosf", "sqrt", "sqrtf", + "copysign", "copysignf", "copysignl", "log", "logf", "powi"}; + +/// A list of intrinsics that are unsupported by the NVPTX backend. +const std::set NVPTXUnsupportedIntrinsics = {"exp", "powi"}; + +/// Return from the full "llvm.." name. +/// +/// Return "" if function is not an intrinsic. +std::string getStrippedIntrinsicName(const Function *F) { + assert(F && "invalid function pointer"); + const StringRef FnName = F->getName(); + + if (F->isIntrinsic() && FnName.startswith("llvm.")) { + const size_t BeginSeparator = FnName.find("."); + const size_t EndSeparator = FnName.rfind("."); + return std::string(FnName.slice(BeginSeparator + 1, EndSeparator)); + } + return ""; +} /// Return the corresponding CUDA libdevice function name for @p F. +/// Note that this function will try to convert instrinsics in the list +/// NVPTXUnsupportedIntrinsics into libdevice functions. +/// This is because some intrinsics such as `exp` +/// are not supported by the NVPTX backend. +/// If this restriction of the backend is lifted, we should refactor our code +/// so that we use intrinsics whenever possible. /// /// Return "" if we are not compiling for CUDA. std::string getCUDALibDeviceFuntion(Function *F) { - if (CUDALibDeviceFunctions.count(F->getName())) - return std::string("__nv_") + std::string(F->getName()); + const std::string FnName = [&] { + const std::string IntrinsicName = getStrippedIntrinsicName(F); + if (NVPTXUnsupportedIntrinsics.count(IntrinsicName)) + return IntrinsicName; + + return std::string(F->getName()); + }(); + + if (CUDALibDeviceFunctions.count(FnName)) + return "__nv_" + FnName; return ""; } @@ -1409,7 +1441,7 @@ return F->isIntrinsic() && (Name.startswith("llvm.sqrt") || Name.startswith("llvm.fabs") || - Name.startswith("llvm.copysign") || Name.startswith("llvm.powi")); + Name.startswith("llvm.copysign")); } /// Do not take `Function` as a subtree value.