Index: clang/lib/CodeGen/BackendUtil.cpp =================================================================== --- clang/lib/CodeGen/BackendUtil.cpp +++ clang/lib/CodeGen/BackendUtil.cpp @@ -78,6 +78,7 @@ #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/JumpThreading.h" +#include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -1108,6 +1109,10 @@ return; } + if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice && + LangOpts.HIPStdParInterposeAlloc) + MPM.addPass(HipStdParAllocationInterpositionPass()); + // Now that we have all of the passes ready, run them. { PrettyStackTraceString CrashInfo("Optimizer"); Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -2327,6 +2327,19 @@ return nullptr; } +static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, + const FunctionDecl *FD) { + auto Name = FD->getNameAsString() + "__hipstdpar_unsupported"; + auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD); + auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy); + + SmallVector Args; + for (auto &&FormalTy : FnTy->params()) + Args.push_back(llvm::PoisonValue::get(FormalTy)); + + return RValue::get(CGF->Builder.CreateCall(UBF, Args)); +} + RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -5765,6 +5778,9 @@ llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); } + if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice) + return EmitHipStdParUnsupportedBuiltin(this, FD); + ErrorUnsupported(E, "builtin function"); // Unknown builtin, for now just dump it out and return undef. @@ -5775,6 +5791,16 @@ unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch) { + // When compiling in HipStdPar mode we have to be conservative in rejecting + // target specific features in the FE, and defer the possible error to the + // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is + // referenced by an accelerator executable function, we emit an error. + // Returning nullptr here leads to the builtin being handled in + // EmitStdParUnsupportedBuiltin. + if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice && + Arch != CGF->getTarget().getTriple().getArch()) + return nullptr; + switch (Arch) { case llvm::Triple::arm: case llvm::Triple::armeb: Index: clang/lib/CodeGen/CGStmt.cpp =================================================================== --- clang/lib/CodeGen/CGStmt.cpp +++ clang/lib/CodeGen/CGStmt.cpp @@ -2420,6 +2420,24 @@ } } +static void EmitHipStdParUnsupportedAsm(CodeGenFunction *CGF, + const AsmStmt &S) { + constexpr auto Name = "__ASM__hipstdpar_unsupported"; + + StringRef Asm; + if (auto GCCAsm = dyn_cast(&S)) + Asm = GCCAsm->getAsmString()->getString(); + + auto &Ctx = CGF->CGM.getLLVMContext(); + + auto StrTy = llvm::ConstantDataArray::getString(Ctx, Asm); + auto FnTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), + {StrTy->getType()}, false); + auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy); + + CGF->Builder.CreateCall(UBF, {StrTy}); +} + void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Pop all cleanup blocks at the end of the asm statement. CodeGenFunction::RunCleanupsScope Cleanups(*this); @@ -2431,27 +2449,38 @@ SmallVector OutputConstraintInfos; SmallVector InputConstraintInfos; - for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) { + bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice; + bool IsValidTargetAsm = true; + for (unsigned i = 0, e = S.getNumOutputs(); i != e && IsValidTargetAsm; i++) { StringRef Name; if (const GCCAsmStmt *GAS = dyn_cast(&S)) Name = GAS->getOutputName(i); TargetInfo::ConstraintInfo Info(S.getOutputConstraint(i), Name); bool IsValid = getTarget().validateOutputConstraint(Info); (void)IsValid; - assert(IsValid && "Failed to parse output constraint"); + if (IsHipStdPar && !IsValid) + IsValidTargetAsm = false; + else + assert(IsValid && "Failed to parse output constraint"); OutputConstraintInfos.push_back(Info); } - for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) { + for (unsigned i = 0, e = S.getNumInputs(); i != e && IsValidTargetAsm; i++) { StringRef Name; if (const GCCAsmStmt *GAS = dyn_cast(&S)) Name = GAS->getInputName(i); TargetInfo::ConstraintInfo Info(S.getInputConstraint(i), Name); bool IsValid = getTarget().validateInputConstraint(OutputConstraintInfos, Info); - assert(IsValid && "Failed to parse input constraint"); (void)IsValid; + if (IsHipStdPar && !IsValid) + IsValidTargetAsm = false; + else + assert(IsValid && "Failed to parse input constraint"); InputConstraintInfos.push_back(Info); } + if (!IsValidTargetAsm) + return EmitHipStdParUnsupportedAsm(this, S); + std::string Constraints; std::vector ResultRegDests; Index: clang/lib/CodeGen/CMakeLists.txt =================================================================== --- clang/lib/CodeGen/CMakeLists.txt +++ clang/lib/CodeGen/CMakeLists.txt @@ -11,6 +11,7 @@ Extensions FrontendHLSL FrontendOpenMP + HIPStdPar IPO IRPrinter IRReader Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -2594,10 +2594,15 @@ std::string MissingFeature; llvm::StringMap CallerFeatureMap; CGM.getContext().getFunctionFeatureMap(CallerFeatureMap, FD); + // When compiling in HipStdPar mode we have to be conservative in rejecting + // target specific features in the FE, and defer the possible error to the + // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is + // referenced by an accelerator executable function, we emit an error. + bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice; if (BuiltinID) { StringRef FeatureList(CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID)); if (!Builtin::evaluateRequiredTargetFeatures( - FeatureList, CallerFeatureMap)) { + FeatureList, CallerFeatureMap) && !IsHipStdPar) { CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature) << TargetDecl->getDeclName() << FeatureList; @@ -2630,7 +2635,7 @@ return false; } return true; - })) + }) && !IsHipStdPar) CGM.getDiags().Report(Loc, diag::err_function_needs_feature) << FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature; } else if (!FD->isMultiVersion() && FD->hasAttr()) { @@ -2639,7 +2644,8 @@ for (const auto &F : CalleeFeatureMap) { if (F.getValue() && (!CallerFeatureMap.lookup(F.getKey()) || - !CallerFeatureMap.find(F.getKey())->getValue())) + !CallerFeatureMap.find(F.getKey())->getValue()) && + !IsHipStdPar) CGM.getDiags().Report(Loc, diag::err_function_needs_feature) << FD->getDeclName() << TargetDecl->getDeclName() << F.getKey(); } Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -3526,7 +3526,7 @@ GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); Emitter.finalize(GV); - return ConstantAddress(GV, GV->getValueType(), Alignment); + return ConstantAddress(GV, GV->getValueType(), Alignment); } ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) { @@ -3585,7 +3585,10 @@ !Global->hasAttr() && !Global->hasAttr() && !Global->getType()->isCUDADeviceBuiltinSurfaceType() && - !Global->getType()->isCUDADeviceBuiltinTextureType()) + !Global->getType()->isCUDADeviceBuiltinTextureType() && + !(LangOpts.HIPStdPar && + isa(Global) && + !Global->hasAttr())) return; } else { // We need to emit host-side 'shadows' for all global Index: clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -x hip -emit-llvm -fcuda-is-device \ +// RUN: -o - %s | FileCheck --check-prefix=NO-HIPSTDPAR-DEV %s + +// RUN: %clang_cc1 --hipstdpar -emit-llvm -fcuda-is-device \ +// RUN: -o - %s | FileCheck --check-prefix=HIPSTDPAR-DEV %s + +#define __device__ __attribute__((device)) + +// NO-HIPSTDPAR-DEV-NOT: define {{.*}} void @foo({{.*}}) +// HIPSTDPAR-DEV: define {{.*}} void @foo({{.*}}) +extern "C" void foo(float *a, float b) { + *a = b; +} + +// NO-HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}}) +// HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}}) +extern "C" __device__ void bar(float *a, float b) { + *a = b; +} Index: clang/test/CodeGenHipStdPar/unsupported-ASM.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenHipStdPar/unsupported-ASM.cpp @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \ +// RUN: --hipstdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s + +#define __global__ __attribute__((global)) + +__global__ void foo(int i) { + asm ("addl %2, %1; seto %b0" : "=q" (i), "+g" (i) : "r" (i)); +} + +// CHECK: declare void @__ASM__hipstdpar_unsupported([{{.*}}]) Index: clang/test/CodeGenHipStdPar/unsupported-builtins.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenHipStdPar/unsupported-builtins.cpp @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \ +// RUN: --hipstdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s + +#define __global__ __attribute__((global)) + +__global__ void foo() { return __builtin_ia32_pause(); } + +// CHECK: declare void @__builtin_ia32_pause__hipstdpar_unsupported()