Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -2784,11 +2784,9 @@ } // By default, we produce an action for each device arch. - if (!Relocatable || CurPhase < phases::Backend || CompileDeviceOnly) { - for (Action *&A : CudaDeviceActions) - A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A, - AssociatedOffloadKind); - } + for (Action *&A : CudaDeviceActions) + A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A, + AssociatedOffloadKind); return (CompileDeviceOnly && CurPhase == FinalPhase) ? ABRT_Ignore_Host : ABRT_Success; Index: clang/lib/Driver/ToolChains/AMDGPU.h =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.h +++ clang/lib/Driver/ToolChains/AMDGPU.h @@ -80,6 +80,11 @@ static bool isWave64(const llvm::opt::ArgList &DriverArgs, llvm::AMDGPU::GPUKind Kind); + /// Needed for using lto. + bool HasNativeLLVMSupport() const override { + return true; + } + }; class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain { Index: clang/lib/Driver/ToolChains/CommonArgs.cpp =================================================================== --- clang/lib/Driver/ToolChains/CommonArgs.cpp +++ clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -204,7 +204,10 @@ /// Get the (LLVM) name of the R600 gpu we are targeting. static std::string getR600TargetGPU(const ArgList &Args) { - if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { + Arg *A = Args.getLastArg(options::OPT_march_EQ); + if (!A) + A = Args.getLastArg(options::OPT_mcpu_EQ); + if (A) { const char *GPUName = A->getValue(); return llvm::StringSwitch(GPUName) .Cases("rv630", "rv635", "r600") Index: clang/lib/Driver/ToolChains/HIP.h =================================================================== --- clang/lib/Driver/ToolChains/HIP.h +++ clang/lib/Driver/ToolChains/HIP.h @@ -38,34 +38,10 @@ const char *LinkingOutput) const override; private: - /// \return llvm-link output file name. - const char *constructLLVMLinkCommand(Compilation &C, const JobAction &JA, - const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, - llvm::StringRef SubArchName, - llvm::StringRef OutputFilePrefix) const; - - /// \return opt output file name. - const char *constructOptCommand(Compilation &C, const JobAction &JA, - const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, - llvm::StringRef SubArchName, - llvm::StringRef OutputFilePrefix, - const char *InputFileName) const; - - /// \return llc output file name. - const char *constructLlcCommand(Compilation &C, const JobAction &JA, - const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, - llvm::StringRef SubArchName, - llvm::StringRef OutputFilePrefix, - const char *InputFileName, - bool OutputIsAsm = false) const; void constructLldCommand(Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const InputInfo &Output, - const llvm::opt::ArgList &Args, - const char *InputFileName) const; + const llvm::opt::ArgList &Args) const; }; } // end namespace AMDGCN Index: clang/lib/Driver/ToolChains/HIP.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIP.cpp +++ clang/lib/Driver/ToolChains/HIP.cpp @@ -49,152 +49,18 @@ } D.Diag(diag::err_drv_no_such_file) << BCName; } - -static const char *getOutputFileName(Compilation &C, StringRef Base, - const char *Postfix, - const char *Extension) { - const char *OutputFileName; - if (C.getDriver().isSaveTempsEnabled()) { - OutputFileName = - C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension); - } else { - std::string TmpName = - C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension); - OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName)); - } - return OutputFileName; -} - -static void addOptLevelArgs(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs, - bool IsLlc = false) { - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - StringRef OOpt = "3"; - if (A->getOption().matches(options::OPT_O4) || - A->getOption().matches(options::OPT_Ofast)) - OOpt = "3"; - else if (A->getOption().matches(options::OPT_O0)) - OOpt = "0"; - else if (A->getOption().matches(options::OPT_O)) { - // Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3 - // so we map -Os/-Oz to -O2. - // Only clang supports -Og, and maps it to -O1. - // We map anything else to -O2. - OOpt = llvm::StringSwitch(A->getValue()) - .Case("1", "1") - .Case("2", "2") - .Case("3", "3") - .Case("s", IsLlc ? "2" : "s") - .Case("z", IsLlc ? "2" : "z") - .Case("g", "1") - .Default("2"); - } - CmdArgs.push_back(Args.MakeArgString("-O" + OOpt)); - } -} } // namespace -const char *AMDGCN::Linker::constructLLVMLinkCommand( - Compilation &C, const JobAction &JA, const InputInfoList &Inputs, - const ArgList &Args, StringRef SubArchName, - StringRef OutputFilePrefix) const { - ArgStringList CmdArgs; - // Add the input bc's created by compile step. - for (const auto &II : Inputs) - CmdArgs.push_back(II.getFilename()); - - // Add an intermediate output file. - CmdArgs.push_back("-o"); - auto OutputFileName = getOutputFileName(C, OutputFilePrefix, "-linked", "bc"); - CmdArgs.push_back(OutputFileName); - const char *Exec = - Args.MakeArgString(getToolChain().GetProgramPath("llvm-link")); - C.addCommand(std::make_unique(JA, *this, Exec, CmdArgs, Inputs)); - return OutputFileName; -} - -const char *AMDGCN::Linker::constructOptCommand( - Compilation &C, const JobAction &JA, const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, - llvm::StringRef OutputFilePrefix, const char *InputFileName) const { - // Construct opt command. - ArgStringList OptArgs; - // The input to opt is the output from llvm-link. - OptArgs.push_back(InputFileName); - // Pass optimization arg to opt. - addOptLevelArgs(Args, OptArgs); - OptArgs.push_back("-mtriple=amdgcn-amd-amdhsa"); - OptArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName)); - - for (const Arg *A : Args.filtered(options::OPT_mllvm)) { - OptArgs.push_back(A->getValue(0)); - } - - OptArgs.push_back("-o"); - auto OutputFileName = - getOutputFileName(C, OutputFilePrefix, "-optimized", "bc"); - OptArgs.push_back(OutputFileName); - const char *OptExec = - Args.MakeArgString(getToolChain().GetProgramPath("opt")); - C.addCommand(std::make_unique(JA, *this, OptExec, OptArgs, Inputs)); - return OutputFileName; -} - -const char *AMDGCN::Linker::constructLlcCommand( - Compilation &C, const JobAction &JA, const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, - llvm::StringRef OutputFilePrefix, const char *InputFileName, - bool OutputIsAsm) const { - // Construct llc command. - ArgStringList LlcArgs; - // The input to llc is the output from opt. - LlcArgs.push_back(InputFileName); - // Pass optimization arg to llc. - addOptLevelArgs(Args, LlcArgs, /*IsLlc=*/true); - LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa"); - LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName)); - LlcArgs.push_back( - Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj"))); - - // Extract all the -m options - std::vector Features; - handleTargetFeaturesGroup( - Args, Features, options::OPT_m_amdgpu_Features_Group); - - // Add features to mattr such as xnack - std::string MAttrString = "-mattr="; - for(auto OneFeature : Features) { - MAttrString.append(Args.MakeArgString(OneFeature)); - if (OneFeature != Features.back()) - MAttrString.append(","); - } - if(!Features.empty()) - LlcArgs.push_back(Args.MakeArgString(MAttrString)); - - for (const Arg *A : Args.filtered(options::OPT_mllvm)) { - LlcArgs.push_back(A->getValue(0)); - } - - // Add output filename - LlcArgs.push_back("-o"); - auto LlcOutputFile = - getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o"); - LlcArgs.push_back(LlcOutputFile); - const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc")); - C.addCommand(std::make_unique(JA, *this, Llc, LlcArgs, Inputs)); - return LlcOutputFile; -} - void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const InputInfo &Output, - const llvm::opt::ArgList &Args, - const char *InputFileName) const { + const llvm::opt::ArgList &Args) const { // Construct lld command. // The output from ld.lld is an HSA code object file. ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", - "-shared", "-o", Output.getFilename(), - InputFileName}; + "-shared", "-o", Output.getFilename()}; + for (auto Input : Inputs) + LldArgs.push_back(Input.getFilename()); const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); C.addCommand(std::make_unique(JA, *this, Lld, LldArgs, Inputs)); } @@ -243,33 +109,7 @@ if (JA.getType() == types::TY_HIP_FATBIN) return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this); - assert(Inputs.size()); - if (Inputs.size() == 1 && Inputs[0].getType() == types::TY_Object) - return constructLldCommand(C, JA, Inputs, Output, Args, - Inputs[0].getFilename()); - - assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn && - "Unsupported target"); - - std::string SubArchName = JA.getOffloadingArch(); - assert(StringRef(SubArchName).startswith("gfx") && "Unsupported sub arch"); - - // Prefix for temporary file name. - std::string Prefix = llvm::sys::path::stem(Inputs[0].getFilename()).str(); - if (!C.getDriver().isSaveTempsEnabled()) - Prefix += "-" + SubArchName; - - // Each command outputs different files. - const char *LLVMLinkCommand = - constructLLVMLinkCommand(C, JA, Inputs, Args, SubArchName, Prefix); - const char *OptCommand = constructOptCommand(C, JA, Inputs, Args, SubArchName, - Prefix, LLVMLinkCommand); - if (C.getDriver().isSaveTempsEnabled()) - constructLlcCommand(C, JA, Inputs, Args, SubArchName, Prefix, OptCommand, - /*OutputIsAsm=*/true); - const char *LlcCommand = - constructLlcCommand(C, JA, Inputs, Args, SubArchName, Prefix, OptCommand); - constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand); + return constructLldCommand(C, JA, Inputs, Output, Args); } HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple, @@ -294,8 +134,6 @@ auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); - CC1Args.push_back("-target-cpu"); - CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); CC1Args.push_back("-fcuda-is-device"); if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, Index: clang/test/Driver/amdgpu-toolchain.c =================================================================== --- clang/test/Driver/amdgpu-toolchain.c +++ clang/test/Driver/amdgpu-toolchain.c @@ -9,3 +9,8 @@ // AS_LINK: ld.lld{{.*}} "-shared" // DWARF_VER: "-dwarf-version=4" + +// RUN: %clang -### -target amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \ +// RUN: -flto %s 2>&1 | FileCheck -check-prefix=LTO %s +// LTO: clang{{.*}} "-flto" +// LTO: ld.lld{{.*}} Index: clang/test/Driver/hip-device-compile.hip =================================================================== --- clang/test/Driver/hip-device-compile.hip +++ clang/test/Driver/hip-device-compile.hip @@ -31,9 +31,10 @@ // BC-SAME: "-emit-llvm-bc" // LL-SAME: "-emit-llvm" // ASM-NOT: "-emit-llvm" -// CHECK-SAME: "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" +// CHECK-SAME: "-main-file-name" "a.cu" // CHECK-SAME: "-fcuda-is-device" // CHECK-SAME: {{".*lib1.bc"}} +// CHECK-SAME: "-target-cpu" "gfx900" // BC-SAME: "-o" "a.bc" // LL-SAME: "-o" "a.ll" // ASM-SAME: "-o" "a.s" Index: clang/test/Driver/hip-phases.hip =================================================================== --- clang/test/Driver/hip-phases.hip +++ clang/test/Driver/hip-phases.hip @@ -30,7 +30,8 @@ // NRD-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-hip, [[ARCH]]) // NRD-DAG: [[P9:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image // NRD-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-hip) -// RDC-DAG: [[P10:[0-9]+]]: linker, {[[P5]]}, image, (device-hip, [[ARCH]]) +// RDC-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]]) +// RDC-DAG: [[P10:[0-9]+]]: linker, {[[P6]]}, image, (device-hip, [[ARCH]]) // NRD-DAG: [[P12:[0-9]+]]: offload, "host-hip (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-hip (amdgcn-amd-amdhsa)" {[[P10]]}, ir @@ -59,7 +60,19 @@ // // RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN2 %s +// RUN: | FileCheck -check-prefixes=BIN2,NRD2,CL2 %s + +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -c 2>&1 \ +// RUN: | FileCheck -check-prefixes=BIN2,NRD2 %s + +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc 2>&1 \ +// RUN: | FileCheck -check-prefixes=BIN2,RDC2,CL2,RCL2 %s + +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc -c 2>&1 \ +// RUN: | FileCheck -check-prefixes=BIN2,RDC2,RC2 %s // BIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) // BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) @@ -68,25 +81,34 @@ // BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH1:gfx803]]) // BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) // BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]]) -// BIN2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]]) -// BIN2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]]) -// BIN2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]]) -// BIN2-DAG: [[P9:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image +// NRD2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]]) +// RDC2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]]) +// RCL2-DAG: [[P8:[0-9]+]]: linker, {[[P6]]}, image, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P9:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image +// RC2-DAG: [[P9:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P6]]}, ir // BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) // BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) // BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]]) -// BIN2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]]) -// BIN2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]]) -// BIN2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]]) -// BIN2-DAG: [[P16:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image +// NRD2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]]) +// RDC2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, ir, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]]) +// RCL2-DAG: [[P15:[0-9]+]]: linker, {[[P13]]}, image, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P16:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image +// RC2-DAG: [[P16:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P13]]}, ir -// BIN2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-hip) +// NRD2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-hip) -// BIN2-DAG: [[P18:[0-9]+]]: offload, "host-hip (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-hip (amdgcn-amd-amdhsa)" {[[P17]]}, ir -// BIN2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]]) +// NRD2-DAG: [[P18:[0-9]+]]: offload, "host-hip (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-hip (amdgcn-amd-amdhsa)" {[[P17]]}, ir +// NRD2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]]) +// RDC2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) // BIN2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) -// BIN2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]]) +// CL2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]]) +// RCL2-DAG: [[P22:[0-9]+]]: offload, "host-hip (x86_64-unknown-linux-gnu)" {[[P21]]}, "device-hip (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, "device-hip (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image +// RC2-DAG: [[P22:[0-9]+]]: clang-offload-bundler, {[[P9]], [[P16]], [[P20]]}, object, (host-[[T]]) // // Test two gpu architecturess up to the assemble phase. @@ -212,3 +234,27 @@ // DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) // DASM2-NOT: host + +// +// Test linking two objects with two gpu architectures. +// +// RUN: touch %T/obj1.o +// RUN: touch %T/obj2.o +// +// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %T/obj1.o %T/obj2.o 2>&1 \ +// RUN: | FileCheck -check-prefixes=L2,NL2 %s +// +// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %T/obj1.o %T/obj2.o \ +// RUN: -fgpu-rdc 2>&1 | FileCheck -check-prefixes=L2,RL2 %s +// +// L2-DAG: [[P0:[0-9]+]]: input, "{{.*}}obj1.o", object, (host-[[T:hip]]) +// RL2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, object, (host-[[T:hip]]) +// L2-DAG: [[P2:[0-9]+]]: input, "{{.*}}obj2.o", object, (host-[[T:hip]]) +// RL2-DAG: [[P3:[0-9]+]]: clang-offload-unbundler, {[[P2]]}, object, (host-[[T:hip]]) +// NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image, (host-[[T:hip]]) +// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (host-[[T:hip]]) +// RL2-DAG: [[P5:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T:hip]], [[ARCH1:gfx803]]) +// RL2-DAG: [[P6:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T:hip]], [[ARCH2:gfx900]]) +// RL2-DAG: [[P7:[0-9]+]]: offload, "host-[[T:hip]] (x86_64-unknown-linux-gnu)" {[[P4]]}, "device-[[T:hip]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P5]]}, "device-[[T:hip]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, image Index: clang/test/Driver/hip-rdc-device-only.hip =================================================================== --- clang/test/Driver/hip-rdc-device-only.hip +++ clang/test/Driver/hip-rdc-device-only.hip @@ -51,9 +51,10 @@ // COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // EMITBC-SAME: "-emit-llvm-bc" // EMITLL-SAME: "-emit-llvm" -// COMMON-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803" +// COMMON-SAME: {{.*}} "-main-file-name" "a.cu" // COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // COMMON-SAME: "-fapply-global-visibility-to-externs" +// COMMON-SAME: "-target-cpu" "gfx803" // EMITBC-SAME: {{.*}} "-o" {{"a.*bc"}} "-x" "hip" // EMITLL-SAME: {{.*}} "-o" {{"a.*ll"}} "-x" "hip" // CHECK-SAME: {{.*}} {{".*a.cu"}} @@ -62,9 +63,10 @@ // COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // EMITBC-SAME: "-emit-llvm-bc" // EMITLL-SAME: "-emit-llvm" -// COMMON-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" +// COMMON-SAME: {{.*}} "-main-file-name" "a.cu" // COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // COMMON-SAME: "-fapply-global-visibility-to-externs" +// COMMON-SAME: "-target-cpu" "gfx900" // EMITBC-SAME: {{.*}} "-o" {{"a.*bc"}} "-x" "hip" // EMITLL-SAME: {{.*}} "-o" {{"a.*ll"}} "-x" "hip" // COMMON-SAME: {{.*}} {{".*a.cu"}} @@ -73,9 +75,10 @@ // COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // EMITBC-SAME: "-emit-llvm-bc" // EMITLL-SAME: "-emit-llvm" -// COMMON-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803" +// COMMON-SAME: {{.*}} "-main-file-name" "b.hip" // COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // COMMON-SAME: "-fapply-global-visibility-to-externs" +// COMMON-SAME: "-target-cpu" "gfx803" // EMITBC-SAME: {{.*}} "-o" {{"b.*bc"}} "-x" "hip" // EMITLL-SAME: {{.*}} "-o" {{"b.*ll"}} "-x" "hip" // COMMON-SAME: {{.*}} {{".*b.hip"}} @@ -84,9 +87,10 @@ // COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // EMITBC-SAME: "-emit-llvm-bc" // EMITLL-SAME: "-emit-llvm" -// COMMON-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900" +// COMMON-SAME: {{.*}} "-main-file-name" "b.hip" // COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // COMMON-SAME: "-fapply-global-visibility-to-externs" +// COMMON-SAME: "-target-cpu" "gfx900" // EMITBC-SAME: {{.*}} "-o" {{"b.*bc"}} "-x" "hip" // EMITLL-SAME: {{.*}} "-o" {{"b.*ll"}} "-x" "hip" // COMMON-SAME: {{.*}} {{".*b.hip"}} Index: clang/test/Driver/hip-save-temps.hip =================================================================== --- clang/test/Driver/hip-save-temps.hip +++ clang/test/Driver/hip-save-temps.hip @@ -2,6 +2,11 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target +// -fno-gpu-rdc without -o with -c +// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \ +// RUN: --cuda-gpu-arch=gfx900 -c %s 2>&1 | \ +// RUN: FileCheck -check-prefixes=CHECK,NORDC %s + // -fno-gpu-rdc without -o // RUN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \ // RUN: --cuda-gpu-arch=gfx900 %s 2>&1 | \ @@ -12,37 +17,58 @@ // RUN: -o executable --cuda-gpu-arch=gfx900 %s 2>&1 | \ // RUN: FileCheck -check-prefixes=CHECK,NORDC,WOUT %s +// -fgpu-rdc without -o with -c +// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \ +// RUN: -fgpu-rdc --cuda-gpu-arch=gfx900 -c %s 2>&1 | \ +// RUN: FileCheck -check-prefixes=CHECK,RDC,RDCC %s + // -fgpu-rdc without -o // RUN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \ // RUN: -fgpu-rdc --cuda-gpu-arch=gfx900 %s 2>&1 | \ -// RUN: FileCheck -check-prefixes=CHECK,RDC,RDC-NOUT,NOUT %s +// RUN: FileCheck -check-prefixes=CHECK,RDC,RDCL,RDC-NOUT,NOUT %s // -fgpu-rdc with -o // RUN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \ // RUN: -o executable -fgpu-rdc --cuda-gpu-arch=gfx900 %s 2>&1 | \ -// RUN: FileCheck -check-prefixes=CHECK,RDC,RDC-WOUT,WOUT %s - -// CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.cui" -// NORDC: {{".*clang.*"}} {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc" -// NORDC: {{".*clang.*"}} {{.*}} "-S" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.s" -// NORDC: {{".*clang.*"}} "-cc1as" {{.*}} "-filetype" "obj" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.o" -// NORDC-NOT: {{.*}}llvm-link -// NORDC-NOT: {{.*}}opt -// NORDC-NOT: {{.*}}llc -// RDC: {{.*}}llvm-link{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp-linked.bc" -// RDC: {{.*}}opt{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp-optimized.bc" -// RDC: {{.*}}llc{{.*}}"-filetype=asm"{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp.s" -// RDC: {{.*}}llc{{.*}}"-filetype=obj"{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp.o" -// NORDC: {{.*}}lld{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.out" -// RDC: {{.*}}lld{{.*}}"-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" -// NORDC: {{.*}}clang-offload-bundler{{.*}}"-outputs=hip-save-temps.hip-hip-amdgcn-amd-amdhsa.hipfb" -// CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui" -// NORDC: {{.*}}clang{{.*}}"-fcuda-include-gpubinary" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" -// RDC: {{.*}}clang{{.*}}"-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" -// CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s" -// CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps{{.*}}.o" -// RDC-NOUT: {{.*}}clang-offload-bundler{{.*}}"-outputs=a.out.hipfb" -// RDC-WOUT: {{.*}}clang-offload-bundler{{.*}}"-outputs=executable.hipfb" -// NOUT: {{.*}}ld{{.*}}"-o" "a.out" -// WOUT: {{.*}}ld{{.*}}"-o" "executable" +// RUN: FileCheck -check-prefixes=CHECK,RDC,RDCL,RDC-WOUT,WOUT %s + +// CHECK: {{".*clang.*"}} "-cc1" {{.*}} "-E" {{.*}} [[CPU:"-target-cpu" "gfx900"]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.cui" +// NORDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc" +// RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp.bc" +// NORDC: {{".*clang.*"}} "-cc1" {{.*}} "-S" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.s" +// RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc" +// NORDC: {{".*clang.*"}} "-cc1as" {{.*}} "-filetype" "obj" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.o" +// CHECK-NOT: llvm-link +// CHECK-NOT: opt +// CHECK-NOT: llc +// NORDC: {{.*lld.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.out" +// RDCL: "{{.*lld.*}}" {{.*}} "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" +// NORDC: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps.hip-hip-amdgcn-amd-amdhsa.hipfb" +// CHECK: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui" +// NORDC: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-fcuda-include-gpubinary" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" +// RDC: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" +// CHECK: "{{.*clang.*}}" "-cc1" {{.*}} "-S" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s" +// CHECK: "{{.*clang.*}}" "-cc1as" {{.*}} "-o" "hip-save-temps{{.*}}.o" +// RDCC: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps.o" +// RDC-NOUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=a.out.hipfb" +// RDC-WOUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=executable.hipfb" +// NOUT: "{{.*ld.*}}" {{.*}} "-o" "a.out" +// WOUT: "{{.*ld.*}}" {{.*}} "-o" "executable" + +// -fgpu-rdc link +// RUN: touch %T/obj1.o +// RUN: touch %T/obj2.o +// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \ +// RUN: --hip-link -o executable -fgpu-rdc --cuda-gpu-arch=gfx900 \ +// RUN: --offload-arch=gfx906 %T/obj1.o %T/obj2.o 2>&1 | \ +// RUN: FileCheck -check-prefixes=LINK %s +// LINK: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=obj1-host-x86_64-unknown-linux-gnu.o,obj1-hip-amdgcn-amd-amdhsa-gfx900.o,obj1-hip-amdgcn-amd-amdhsa-gfx906.o" "-unbundle" +// LINK: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=obj2-host-x86_64-unknown-linux-gnu.o,obj2-hip-amdgcn-amd-amdhsa-gfx900.o,obj2-hip-amdgcn-amd-amdhsa-gfx906.o" "-unbundle" +// LINK-NOT: llvm-link +// LINK-NOT: opt +// LINK-NOT: llc +// LINK: "{{.*lld.*}}" {{.*}} "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" "obj1-hip-amdgcn-amd-amdhsa-gfx900.o" "obj2-hip-amdgcn-amd-amdhsa-gfx900.o" +// LINK: "{{.*lld.*}}" {{.*}} "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx906" "obj1-hip-amdgcn-amd-amdhsa-gfx906.o" "obj2-hip-amdgcn-amd-amdhsa-gfx906.o" +// LINK: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=executable.hipfb" +// LINK: "{{.*ld.*}}" {{.*}} "-o" "executable" {{.*}} "-T" "executable.lk" Index: clang/test/Driver/hip-toolchain-no-rdc.hip =================================================================== --- clang/test/Driver/hip-toolchain-no-rdc.hip +++ clang/test/Driver/hip-toolchain-no-rdc.hip @@ -7,11 +7,29 @@ // RUN: --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \ // RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ // RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \ -// RUN: -fuse-ld=lld \ +// RUN: -fuse-ld=lld -nogpuinc \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LINK %s + +// RUN: %clang -### -target x86_64-linux-gnu -fno-gpu-rdc \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ +// RUN: --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \ +// RUN: -fuse-ld=lld -nogpuinc -c \ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck -check-prefixes=CHECK %s +// RUN: touch %T/a.o +// RUN: touch %T/b.o +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --hip-link --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ +// RUN: -fuse-ld=lld -nogpuinc \ +// RUN: %T/a.o %T/b.o \ +// RUN: 2>&1 | FileCheck -check-prefixes=LKONLY %s + // // Compile device code in a.cu to code object for gfx803. // @@ -19,10 +37,11 @@ // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" -// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803" +// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" // CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" +// CHECK-SAME: "-target-cpu" "gfx803" // CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_803:".*o"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]] @@ -40,10 +59,11 @@ // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" -// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" +// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" // CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" +// CHECK-SAME: "-target-cpu" "gfx900" // CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_900:".*o"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC]] @@ -77,10 +97,11 @@ // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" -// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803" +// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" // CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" +// CHECK-SAME: "-target-cpu" "gfx803" // CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_B_803:".*o"]] "-x" "hip" // CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]] @@ -98,10 +119,11 @@ // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" -// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900" +// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" // CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" +// CHECK-SAME: "-target-cpu" "gfx900" // CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_B_900:".*o"]] "-x" "hip" // CHECK-SAME: {{.*}} [[B_SRC]] @@ -132,5 +154,14 @@ // Link host objects. // -// CHECK: [[LD:".*ld.*"]] {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] -// CHECK-NOT: "-T" "{{.*}}.lk" +// LINK-NOT: llvm-link +// LINK-NOT: opt +// LINK-NOT: llc +// LINK: [[LD:".*ld.*"]] {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] +// LINK-NOT: "-T" "{{.*}}.lk" + +// LKONLY-NOT: llvm-link +// LKONLY-NOT: opt +// LKONLY-NOT: llc +// LKONLY: [[LD:".*ld.*"]] {{.*}} "{{.*/a.o}}" "{{.*/b.o}}" +// LKONLY-NOT: "-T" "{{.*}}.lk" Index: clang/test/Driver/hip-toolchain-rdc-separate.hip =================================================================== --- /dev/null +++ clang/test/Driver/hip-toolchain-rdc-separate.hip @@ -0,0 +1,112 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -c -### -target x86_64-linux-gnu \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ +// RUN: --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \ +// RUN: -fuse-ld=lld -fgpu-rdc -nogpuinc \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s + +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" +// CHECK-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" +// CHECK-SAME: "-fapply-global-visibility-to-externs" +// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" +// CHECK-SAME: "-target-cpu" "gfx803" +// CHECK-SAME: {{.*}} "-o" "[[A_BC1:.*bc]]" "-x" "hip" +// CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]] + +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" +// CHECK-SAME: "-fcuda-is-device" "-fgpu-rdc" +// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" +// CHECK-SAME: "-target-cpu" "gfx900" +// CHECK-SAME: {{.*}} "-o" "[[A_BC2:.*bc]]" "-x" "hip" +// CHECK-SAME: {{.*}} [[A_SRC]] + +// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-emit-obj" +// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" +// CHECK-SAME: {{.*}} "-o" "[[A_OBJ_HOST:.*o]]" "-x" "hip" +// CHECK-SAME: {{.*}} [[A_SRC]] + +// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// CHECK-SAME: "-targets=hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900,host-x86_64-unknown-linux-gnu" +// CHECK-SAME: "-outputs=[[A_O:.*a.o]]" "-inputs=[[A_BC1]],[[A_BC2]],[[A_OBJ_HOST]]" + +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" +// CHECK-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" +// CHECK-SAME: "-fapply-global-visibility-to-externs" +// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" +// CHECK-SAME: "-target-cpu" "gfx803" +// CHECK-SAME: {{.*}} "-o" "[[B_BC1:.*bc]]" "-x" "hip" +// CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]] + +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" +// CHECK-SAME: "-fcuda-is-device" "-fgpu-rdc" +// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" +// CHECK-SAME: "-target-cpu" "gfx900" +// CHECK-SAME: {{.*}} "-o" "[[B_BC2:.*bc]]" "-x" "hip" +// CHECK-SAME: {{.*}} [[B_SRC]] + +// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-emit-obj" +// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" +// CHECK-SAME: {{.*}} "-o" "[[B_OBJ_HOST:.*o]]" "-x" "hip" +// CHECK-SAME: {{.*}} [[B_SRC]] + +// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// CHECK-SAME: "-targets=hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900,host-x86_64-unknown-linux-gnu" +// CHECK-SAME: "-outputs=[[B_O:.*b.o]]" "-inputs=[[B_BC1]],[[B_BC2]],[[B_OBJ_HOST]]" + +// RUN: touch %T/a.o +// RUN: touch %T/b.o +// RUN: %clang --hip-link -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ +// RUN: -fuse-ld=lld -fgpu-rdc -nogpuinc \ +// RUN: %T/a.o %T/b.o \ +// RUN: 2>&1 | FileCheck -check-prefix=LINK %s + +// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// LINK-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs=[[A_OBJ_HOST:.*o]],[[A_BC1:.*o]],[[A_BC2:.*o]]" +// LINK: "-unbundle" + +// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// LINK-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs=[[B_OBJ_HOST:.*o]],[[B_BC1:.*o]],[[B_BC2:.*o]]" +// LINK: "-unbundle" + +// LINK-NOT: "*.llvm-link" +// LINK-NOT: ".*opt" +// LINK-NOT: ".*llc" +// LINK: {{".*lld.*"}} {{.*}} "-o" "[[IMG_DEV1:.*.out]]" "[[A_BC1]]" "[[B_BC1]]" + +// LINK-NOT: "*.llvm-link" +// LINK-NOT: ".*opt" +// LINK-NOT: ".*llc" +// LINK: {{".*lld.*"}} {{.*}} "-o" "[[IMG_DEV2:.*.out]]" "[[A_BC2]]" "[[B_BC2]]" + +// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// LINK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// LINK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]" + +// LINK: [[LD:".*ld.*"]] {{.*}} "[[A_OBJ_HOST]]" "[[B_OBJ_HOST]]" +// LINK-SAME: {{.*}} "-T" "{{.*}}.lk" Index: clang/test/Driver/hip-toolchain-rdc.hip =================================================================== --- clang/test/Driver/hip-toolchain-rdc.hip +++ clang/test/Driver/hip-toolchain-rdc.hip @@ -7,7 +7,7 @@ // RUN: --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \ // RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ // RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \ -// RUN: -fuse-ld=lld -fgpu-rdc \ +// RUN: -fuse-ld=lld -fgpu-rdc -nogpuinc \ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s @@ -15,70 +15,54 @@ // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" -// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803" +// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" // CHECK-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" -// CHECK-SAME: {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip" +// CHECK-SAME: "-target-cpu" "gfx803" +// CHECK-SAME: {{.*}} "-o" [[A_BC1:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]] // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" -// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803" +// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" // CHECK-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" -// CHECK-SAME: {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip" +// CHECK-SAME: "-target-cpu" "gfx803" +// CHECK-SAME: {{.*}} "-o" [[B_BC1:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]] -// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[A_BC]] [[B_BC]] -// CHECK-SAME: "-o" [[LINKED_BC_DEV1:".*-gfx803-linked-.*bc"]] - -// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV1]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-mcpu=gfx803" -// CHECK-SAME: "-o" [[OPT_BC_DEV1:".*-gfx803-optimized.*bc"]] - -// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV1]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-mcpu=gfx803" -// CHECK-SAME: "-filetype=obj" -// CHECK-SAME: "-o" [[OBJ_DEV1:".*-gfx803-.*o"]] - -// CHECK: [[LLD: ".*lld.*"]] "-flavor" "gnu" "--no-undefined" "-shared" -// CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[OBJ_DEV1]] +// CHECK-NOT: "*.llvm-link" +// CHECK-NOT: ".*opt" +// CHECK-NOT: ".*llc" +// CHECK: {{".*lld.*"}} {{.*}} "-o" "[[IMG_DEV1:.*.out]]" [[A_BC1]] [[B_BC1]] // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" -// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" +// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" // CHECK-SAME: "-fcuda-is-device" "-fgpu-rdc" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" -// CHECK-SAME: {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip" +// CHECK-SAME: "-target-cpu" "gfx900" +// CHECK-SAME: {{.*}} "-o" [[A_BC2:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC]] // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" -// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900" +// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" // CHECK-SAME: "-fcuda-is-device" "-fgpu-rdc" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" -// CHECK-SAME: {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip" +// CHECK-SAME: "-target-cpu" "gfx900" +// CHECK-SAME: {{.*}} "-o" [[B_BC2:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[B_SRC]] -// CHECK: [[LLVM_LINK]] [[A_BC]] [[B_BC]] -// CHECK-SAME: "-o" [[LINKED_BC_DEV2:".*-gfx900-linked-.*bc"]] - -// CHECK: [[OPT]] [[LINKED_BC_DEV2]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-mcpu=gfx900" -// CHECK-SAME: "-o" [[OPT_BC_DEV2:".*-gfx900-optimized.*bc"]] - -// CHECK: [[LLC]] [[OPT_BC_DEV2]] "-mtriple=amdgcn-amd-amdhsa" -// CHECk-SAME: "-mcpu=gfx900" -// CHECK-SAME: "-filetype=obj" -// CHECK-SAME: "-o" [[OBJ_DEV2:".*-gfx900-.*o"]] - -// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared" -// CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[OBJ_DEV2]] +// CHECK-NOT: "*.llvm-link" +// CHECK-NOT: ".*opt" +// CHECK-NOT: ".*llc" +// CHECK: {{".*lld.*"}} {{.*}} "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]] // CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"