Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -2724,6 +2724,12 @@ for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { // Create a link action to link device IR with device library // and generate ISA. + CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction( + C, Args, phases::Backend, CudaDeviceActions[I], + AssociatedOffloadKind); + CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction( + C, Args, phases::Assemble, CudaDeviceActions[I], + AssociatedOffloadKind); ActionList AL; AL.push_back(CudaDeviceActions[I]); CudaDeviceActions[I] = @@ -2779,7 +2785,7 @@ } // By default, we produce an action for each device arch. - if (!Relocatable || CurPhase <= phases::Backend) { + if (!Relocatable || CurPhase < phases::Backend || CompileDeviceOnly) { for (Action *&A : CudaDeviceActions) A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A, AssociatedOffloadKind); Index: clang/lib/Driver/ToolChains/HIP.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIP.cpp +++ clang/lib/Driver/ToolChains/HIP.cpp @@ -243,6 +243,11 @@ if (JA.getType() == types::TY_HIP_FATBIN) return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this); + assert(Inputs.size()); + if (Inputs.size() == 1 && Inputs[0].getType() == types::TY_Object) + return constructLldCommand(C, JA, Inputs, Output, Args, + Inputs[0].getFilename()); + assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn && "Unsupported target"); Index: clang/test/Driver/cuda-phases.cu =================================================================== --- clang/test/Driver/cuda-phases.cu +++ clang/test/Driver/cuda-phases.cu @@ -7,71 +7,41 @@ // REQUIRES: clang-driver // REQUIRES: powerpc-registered-target // REQUIRES: nvptx-registered-target -// REQUIRES: amdgpu-registered-target // // Test single gpu architecture with complete compilation. // // Test CUDA NVPTX phases. // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=sm_30 %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN,BIN_NV %s +// RUN: | FileCheck -check-prefixes=BIN %s // -// Test HIP AMDGPU -fgpu-rdc phases. -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ -// RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN,BIN_AMD,BIN_AMD_RDC %s -// -// Test HIP AMDGPU -fno-gpu-rdc phases (default). -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ -// RUN: --cuda-gpu-arch=gfx803 %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN,BIN_AMD,BIN_AMD_NRDC %s -// -// BIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) -// BIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]]) +// BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) // BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) // BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) -// BIN_NV-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:sm_30]]) -// BIN_AMD-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:gfx803]]) +// BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:sm_30]]) // BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]]) -// BIN_NV-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]]) -// BIN_NV-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]]) -// BIN_NV-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P7]]}, object -// BIN_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH]])" {[[P6]]}, assembler -// BIN_NV-DAG: [[P10:[0-9]+]]: linker, {[[P8]], [[P9]]}, cuda-fatbin, (device-[[T]]) -// BIN_NV-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P10]]}, ir -// BIN_NV-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]]) -// BIN_AMD_RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) -// BIN_AMD_NRDC-DAG: [[P6:[0-9]+]]: linker, {[[P5]]}, image, (device-hip, [[ARCH]]) -// BIN_AMD_NRDC-DAG: [[P7:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[P6]]}, image -// BIN_AMD_NRDC-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, hip-fatbin, (device-hip) -// BIN_AMD_NRDC-DAG: [[P11:[0-9]+]]: offload, "host-hip (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-hip (amdgcn-amd-amdhsa)" {[[P8]]}, ir -// BIN_AMD_NRDC-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]]) +// BIN-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]]) +// BIN-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]]) +// BIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P7]]}, object +// BIN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH]])" {[[P6]]}, assembler +// BIN-DAG: [[P10:[0-9]+]]: linker, {[[P8]], [[P9]]}, cuda-fatbin, (device-[[T]]) +// BIN-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P10]]}, ir +// BIN-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]]) // BIN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]]) // BIN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]]) -// BIN_AMD_RDC-DAG: [[P15:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]]) -// BIN_AMD_RDC-DAG: [[P16:[0-9]+]]: linker, {[[P15]]}, image, (device-[[T]], [[ARCH]]) -// BIN_AMD_RDC-DAG: [[P17:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P14]]}, -// BIN_AMD_RDC-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P16]]}, object // // Test single gpu architecture up to the assemble phase. // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=sm_30 %s -S 2>&1 \ -// RUN: | FileCheck -check-prefixes=ASM,ASM_NV %s -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ -// RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s -S 2>&1 \ -// RUN: | FileCheck -check-prefixes=ASM,ASM_AMD %s -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ -// RUN: --cuda-gpu-arch=gfx803 -fcuda-rdc %s -S 2>&1 \ -// RUN: | FileCheck -check-prefixes=ASM,ASM_AMD %s -// ASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) -// ASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) +// RUN: | FileCheck -check-prefixes=ASM %s +// ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) // ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) -// ASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) -// ASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler +// ASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// ASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P3]]}, assembler // ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]]) // ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (host-[[T]]) // ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-[[T]]) @@ -82,62 +52,46 @@ // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN2,BIN2_NV %s -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ -// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN2,BIN2_AMD %s -// BIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) -// BIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]]) +// RUN: | FileCheck -check-prefixes=BIN2 %s +// BIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) // BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) // BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) -// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH1:sm_30|gfx803]]) +// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH1:sm_30]]) // BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) // BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]]) -// BIN2_NV-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]]) -// BIN2_NV-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]]) -// BIN2_NV-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH1]])" {[[P7]]}, object -// BIN2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH1]])" {[[P6]]}, assembler -// BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]]) +// BIN2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]]) +// BIN2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]]) +// BIN2-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH1]])" {[[P7]]}, object +// BIN2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH1]])" {[[P6]]}, assembler +// BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]]) // BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) // BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]]) -// BIN2_NV-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]]) -// BIN2_NV-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]]) -// BIN2_NV-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P14]]}, object -// BIN2_NV-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P13]]}, assembler -// BIN2_NV-DAG: [[P17:[0-9]+]]: linker, {[[P8]], [[P9]], [[P15]], [[P16]]}, cuda-fatbin, (device-[[T]]) -// BIN2_NV-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P17]]}, ir -// BIN2_NV-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]]) -// BIN2_AMD-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// BIN2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]]) +// BIN2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]]) +// BIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P14]]}, object +// BIN2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P13]]}, assembler +// BIN2-DAG: [[P17:[0-9]+]]: linker, {[[P8]], [[P9]], [[P15]], [[P16]]}, cuda-fatbin, (device-[[T]]) +// BIN2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P17]]}, ir +// BIN2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]]) // BIN2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) // BIN2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]]) -// BIN2_AMD-DAG: [[P22:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH1]]) -// BIN2_AMD-DAG: [[P23:[0-9]+]]: backend, {[[P12]]}, ir, (device-[[T]], [[ARCH2]]) -// BIN2_AMD-DAG: [[P24:[0-9]+]]: linker, {[[P22]]}, image, (device-[[T]], [[ARCH1]]) -// BIN2_AMD-DAG: [[P25:[0-9]+]]: linker, {[[P23]]}, image, (device-[[T]], [[ARCH2]]) -// BIN2_AMD-DAG: [[P26:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P21]]}, -// BIN2_AMD-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH1]])" {[[P24]]}, -// BIN2_AMD-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH2]])" {[[P25]]}, object // // Test two gpu architecturess up to the assemble phase. // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \ -// RUN: | FileCheck -check-prefixes=ASM2,ASM2_NV %s -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ -// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %s -S 2>&1 \ -// RUN: | FileCheck -check-prefixes=ASM2,ASM2_AMD %s -// ASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH1:sm_30]]) -// ASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]]) +// RUN: | FileCheck -check-prefixes=ASM2 %s +// ASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH1:sm_30]]) // ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) // ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH1]]) -// ASM2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH1]]) -// ASM2_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH1]])" {[[P3]]}, assembler -// ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]]) +// ASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH1]]) +// ASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH1]])" {[[P3]]}, assembler +// ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]]) // ASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) // ASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) -// ASM2_NV-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]]) -// ASM2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler +// ASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]]) +// ASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler // ASM2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]]) // ASM2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (host-[[T]]) // ASM2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (host-[[T]]) @@ -149,12 +103,8 @@ // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only 2>&1 \ -// RUN: | FileCheck -check-prefixes=HBIN,HBIN_NV %s -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ -// RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only 2>&1 \ -// RUN: | FileCheck -check-prefixes=HBIN,HBIN_AMD %s -// HBIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) -// HBIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]]) +// RUN: | FileCheck -check-prefixes=HBIN %s +// HBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) // HBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) // HBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) // HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) @@ -167,12 +117,8 @@ // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only -S 2>&1 \ -// RUN: | FileCheck -check-prefixes=HASM,HASM_NV %s -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ -// RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only -S 2>&1 \ -// RUN: | FileCheck -check-prefixes=HASM,HASM_AMD %s -// HASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) -// HASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]]) +// RUN: | FileCheck -check-prefixes=HASM %s +// HASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) // HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) // HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) // HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) @@ -184,12 +130,8 @@ // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only 2>&1 \ -// RUN: | FileCheck -check-prefixes=HBIN2,HBIN2_NV %s -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ -// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only 2>&1 \ -// RUN: | FileCheck -check-prefixes=HBIN2,HBIN2_AMD %s -// HBIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) -// HBIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]]) +// RUN: | FileCheck -check-prefixes=HBIN2 %s +// HBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) // HBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) // HBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) // HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) @@ -203,12 +145,8 @@ // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only -S \ -// RUN: 2>&1 | FileCheck -check-prefixes=HASM2,HASM2_NV %s -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ -// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only -S \ -// RUN: 2>&1 | FileCheck -check-prefixes=HASM2,HASM2_AMD %s -// HASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) -// HASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]]) +// RUN: 2>&1 | FileCheck -check-prefixes=HASM2 %s +// HASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) // HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) // HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) // HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) @@ -220,17 +158,13 @@ // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only 2>&1 \ -// RUN: | FileCheck -check-prefixes=DBIN,DBIN_NV %s -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ -// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only 2>&1 \ -// RUN: | FileCheck -check-prefixes=DBIN,DBIN_AMD %s -// DBIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) -// DBIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) +// RUN: | FileCheck -check-prefixes=DBIN %s +// DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) // DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) -// DBIN_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) -// DBIN_NV-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) -// DBIN_NV-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object +// DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object // DBIN-NOT: host // // Test single gpu architecture up to the assemble phase in device-only @@ -238,16 +172,12 @@ // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \ -// RUN: | FileCheck -check-prefixes=DASM,DASM_NV %s -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ -// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -S 2>&1 \ -// RUN: | FileCheck -check-prefixes=DASM,DASM_AMD %s -// DASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) -// DASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) +// RUN: | FileCheck -check-prefixes=DASM %s +// DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) // DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) -// DASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) -// DASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler +// DASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P3]]}, assembler // DASM-NOT: host // @@ -256,23 +186,19 @@ // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \ -// RUN: | FileCheck -check-prefixes=DBIN2,DBIN2_NV %s -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ -// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \ -// RUN: 2>&1 | FileCheck -check-prefixes=DBIN2,DBIN2_AMD %s -// DBIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) -// DBIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) +// RUN: | FileCheck -check-prefixes=DBIN2 %s +// DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) // DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) -// DBIN2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) -// DBIN2_NV-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) -// DBIN2_NV-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P4]]}, object -// DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]]) +// DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) +// DBIN2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P4]]}, object +// DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]]) // DBIN2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) // DBIN2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]]) -// DBIN2_NV-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]]) -// DBIN2_NV-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]]) -// DBIN2_NV-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P10]]}, object +// DBIN2-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P10]]}, object // DBIN2-NOT: host // // Test two gpu architectures up to the assemble phase in device-only @@ -280,20 +206,15 @@ // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S \ -// RUN: 2>&1 | FileCheck -check-prefixes=DASM2,DASM2_NV %s -// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu \ -// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ -// RUN: --cuda-device-only -S 2>&1 \ -// RUN: | FileCheck -check-prefixes=DASM2,DASM2_AMD %s -// DASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) -// DASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) +// RUN: 2>&1 | FileCheck -check-prefixes=DASM2 %s +// DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) // DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) -// DASM2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) -// DASM2_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler -// DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]]) +// DASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P3]]}, assembler +// DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]]) // DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) -// DASM2_NV-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]]) -// DASM2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler +// DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]]) +// DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler // DASM2-NOT: host Index: clang/test/Driver/hip-binding.hip =================================================================== --- clang/test/Driver/hip-binding.hip +++ clang/test/Driver/hip-binding.hip @@ -2,6 +2,24 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target +// RUN: %clang -ccc-print-bindings -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -c 2>&1 | FileCheck -check-prefix=NRDCS %s +// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[OBJ1:.*o]]" +// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ1]]"], output: "[[IMG1:.*]]" +// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[OBJ2:.*o]]" +// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*]]" +// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[IMG1]]", "[[IMG2]]"], output: "[[FATBIN:.*]]" +// NRDCS: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[IN]]", "[[FATBIN]]"], output: "{{.*}}" + +// RUN: %clang -ccc-print-bindings -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -c -fgpu-rdc 2>&1 | FileCheck -check-prefix=RDCS %s +// RDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[BC1:.*bc]]" +// RDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[BC2:.*bc]]" +// RDCS: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[IN]]"], output: "[[HOSTOBJ:.*o]]" +// RDCS: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[BC1]]", "[[BC2]]", "[[HOSTOBJ]]"], output: "{{.*}}" + // RUN: touch %t.o // RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\ Index: clang/test/Driver/hip-device-compile.hip =================================================================== --- clang/test/Driver/hip-device-compile.hip +++ clang/test/Driver/hip-device-compile.hip @@ -63,10 +63,10 @@ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ // RUN: 2>&1 | FileCheck -check-prefixes=BUNDLE %s -// BUNDLE: {{"*.clang.*"}} -// BUNDLE: {{"*.llvm-link"}} -// BUNDLE: {{".*opt"}} -// BUNDLE: {{".*llc"}} +// BUNDLE: {{"*.clang.*"}} {{.*}} "-emit-obj" +// BUNDLE-NOT: {{"*.llvm-link"}} +// BUNDLE-NOT: {{".*opt"}} +// BUNDLE-NOT: {{".*llc"}} // BUNDLE: {{".*lld.*"}} // BUNDLE: {{".*clang-offload-bundler"}} Index: clang/test/Driver/hip-phases.hip =================================================================== --- /dev/null +++ clang/test/Driver/hip-phases.hip @@ -0,0 +1,214 @@ +// Tests the phases generated for a CUDA offloading target for different +// combinations of: +// - Number of gpu architectures; +// - Host/device-only compilation; +// - User-requested final phase - binary or assembly. + +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target +// +// Test single gpu architecture with complete compilation. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=BIN,NRD %s +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=BIN,RDC %s +// +// BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) +// BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) +// BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) + +// BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]]) +// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) +// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]]) +// NRD-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]]) +// NRD-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]]) +// NRD-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-hip, [[ARCH]]) +// NRD-DAG: [[P9:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image +// NRD-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-hip) +// RDC-DAG: [[P10:[0-9]+]]: linker, {[[P5]]}, image, (device-hip, [[ARCH]]) + +// NRD-DAG: [[P12:[0-9]+]]: offload, "host-hip (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-hip (amdgcn-amd-amdhsa)" {[[P10]]}, ir + +// NRD-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]]) +// RDC-DAG: [[P13:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// BIN-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (host-[[T]]) +// BIN-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (host-[[T]]) +// RDC-DAG: [[P16:[0-9]+]]: offload, "host-hip (x86_64-unknown-linux-gnu)" {[[P15]]}, "device-hip (amdgcn-amd-amdhsa:gfx803)" {[[P10]]}, image +// +// Test single gpu architecture up to the assemble phase. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 %s -S 2>&1 \ +// RUN: | FileCheck -check-prefixes=ASM %s +// ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) +// ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) +// ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) + +// ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (host-[[T]]) +// ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (host-[[T]]) +// ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-[[T]]) +// ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]]) + +// +// Test two gpu architectures with complete compilation. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=BIN2 %s + +// BIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) +// BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) +// BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) + +// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH1:gfx803]]) +// BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) +// BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]]) +// BIN2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]]) +// BIN2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]]) +// BIN2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]]) +// BIN2-DAG: [[P9:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image + +// BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) +// BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) +// BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]]) +// BIN2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]]) +// BIN2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]]) +// BIN2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]]) +// BIN2-DAG: [[P16:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image + +// BIN2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-hip) + +// BIN2-DAG: [[P18:[0-9]+]]: offload, "host-hip (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-hip (amdgcn-amd-amdhsa)" {[[P17]]}, ir +// BIN2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]]) +// BIN2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) +// BIN2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]]) + +// +// Test two gpu architecturess up to the assemble phase. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -S 2>&1 \ +// RUN: | FileCheck -check-prefixes=ASM2 %s +// ASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]]) +// ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) +// ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH1]]) +// ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]]) +// ASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) +// ASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) +// ASM2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (host-[[T]]) +// ASM2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (host-[[T]]) +// ASM2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (host-[[T]]) +// ASM2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]]) + +// +// Test single gpu architecture with complete compilation in host-only +// compilation mode. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only 2>&1 \ +// RUN: | FileCheck -check-prefixes=HBIN %s +// HBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) +// HBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) +// HBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) +// HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]]) +// HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]]) +// HBIN-NOT: device +// +// Test single gpu architecture up to the assemble phase in host-only +// compilation mode. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only -S 2>&1 \ +// RUN: | FileCheck -check-prefixes=HASM %s +// HASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) +// HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) +// HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) +// HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// HASM-NOT: device + +// +// Test two gpu architectures with complete compilation in host-only +// compilation mode. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only 2>&1 \ +// RUN: | FileCheck -check-prefixes=HBIN2 %s +// HBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) +// HBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) +// HBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) +// HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]]) +// HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]]) +// HBIN2-NOT: device + +// +// Test two gpu architectures up to the assemble phase in host-only +// compilation mode. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only -S \ +// RUN: 2>&1 | FileCheck -check-prefixes=HASM2 %s +// HASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]]) +// HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) +// HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) +// HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// HASM2-NOT: device + +// +// Test single gpu architecture with complete compilation in device-only +// compilation mode. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only 2>&1 \ +// RUN: | FileCheck -check-prefixes=DBIN %s +// DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) +// DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) +// DBIN-NOT: host +// +// Test single gpu architecture up to the assemble phase in device-only +// compilation mode. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -S 2>&1 \ +// RUN: | FileCheck -check-prefixes=DASM %s +// DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) +// DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) +// DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) +// DASM-NOT: host + +// +// Test two gpu architectures with complete compilation in device-only +// compilation mode. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \ +// RUN: 2>&1 | FileCheck -check-prefixes=DBIN2 %s +// DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) +// DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) +// DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) +// DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) +// DBIN2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]]) +// DBIN2-NOT: host +// +// Test two gpu architectures up to the assemble phase in device-only +// compilation mode. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu \ +// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: --cuda-device-only -S 2>&1 \ +// RUN: | FileCheck -check-prefixes=DASM2 %s +// DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) +// DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) +// DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) +// DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) +// DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) +// DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) +// DASM2-NOT: host Index: clang/test/Driver/hip-save-temps.hip =================================================================== --- clang/test/Driver/hip-save-temps.hip +++ clang/test/Driver/hip-save-temps.hip @@ -23,15 +23,22 @@ // RUN: FileCheck -check-prefixes=CHECK,RDC,RDC-WOUT,WOUT %s // CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.cui" -// CHECK: {{.*}}llvm-link{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900-linked.bc" -// CHECK: {{.*}}opt{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900-optimized.bc" -// CHECK: {{.*}}llc{{.*}}"-filetype=asm"{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.s" -// CHECK: {{.*}}llc{{.*}}"-filetype=obj"{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.o" +// NORDC: {{".*clang.*"}} {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc" +// NORDC: {{".*clang.*"}} {{.*}} "-S" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.s" +// NORDC: {{".*clang.*"}} "-cc1as" {{.*}} "-filetype" "obj" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.o" +// NORDC-NOT: {{.*}}llvm-link +// NORDC-NOT: {{.*}}opt +// NORDC-NOT: {{.*}}llc +// RDC: {{.*}}llvm-link{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp-linked.bc" +// RDC: {{.*}}opt{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp-optimized.bc" +// RDC: {{.*}}llc{{.*}}"-filetype=asm"{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp.s" +// RDC: {{.*}}llc{{.*}}"-filetype=obj"{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp.o" // NORDC: {{.*}}lld{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.out" // RDC: {{.*}}lld{{.*}}"-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" // NORDC: {{.*}}clang-offload-bundler{{.*}}"-outputs=hip-save-temps.hip-hip-amdgcn-amd-amdhsa.hipfb" // CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui" -// CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" +// NORDC: {{.*}}clang{{.*}}"-fcuda-include-gpubinary" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" +// RDC: {{.*}}clang{{.*}}"-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc" // CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s" // CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps{{.*}}.o" // RDC-NOUT: {{.*}}clang-offload-bundler{{.*}}"-outputs=a.out.hipfb" Index: clang/test/Driver/hip-toolchain-mllvm.hip =================================================================== --- clang/test/Driver/hip-toolchain-mllvm.hip +++ clang/test/Driver/hip-toolchain-mllvm.hip @@ -9,32 +9,18 @@ // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" -// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-target-cpu" "gfx803" // CHECK-SAME: {{.*}} "-mllvm" "-amdgpu-function-calls=0" {{.*}} -// CHECK: [[OPT:".*opt"]] {{".*-gfx803-linked.*bc"}} "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-mcpu=gfx803" "-amdgpu-function-calls=0" -// CHECK-SAME: "-o" [[OPT_803_BC:".*-gfx803-optimized.*bc"]] - -// CHECK: [[LLC: ".*llc"]] [[OPT_803_BC]] -// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: {{.*}} "-mcpu=gfx803" -// CHECK-SAME: "-filetype=obj" -// CHECK-SAME: "-amdgpu-function-calls=0" "-o" {{".*-gfx803-.*o"}} +// CHECK-NOT: {{".*opt"}} +// CHECK-NOT: {{".*llc"}} // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" -// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-target-cpu" "gfx900" // CHECK-SAME: {{.*}} "-mllvm" "-amdgpu-function-calls=0" {{.*}} -// CHECK: [[OPT]] {{".*-gfx900-linked.*bc"}} "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-mcpu=gfx900" "-amdgpu-function-calls=0" -// CHECK-SAME: "-o" [[OPT_900_BC:".*-gfx900-optimized.*bc"]] - -// CHECK: [[LLC]] [[OPT_900_BC]] -// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: {{.*}} "-mcpu=gfx900" -// CHECk-SAME: "-filetype=obj" -// CHECK-SAME: "-amdgpu-function-calls=0" "-o" {{".*-gfx900-.*o"}} +// CHECK-NOT: {{".*opt"}} +// CHECK-NOT: {{".*llc"}} Index: clang/test/Driver/hip-toolchain-no-rdc.hip =================================================================== --- clang/test/Driver/hip-toolchain-no-rdc.hip +++ clang/test/Driver/hip-toolchain-no-rdc.hip @@ -18,25 +18,17 @@ // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" -// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803" // CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" -// CHECK-SAME: {{.*}} "-o" [[A_BC_803:".*bc"]] "-x" "hip" +// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_803:".*o"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]] -// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[A_BC_803]] -// CHECK-SAME: "-o" [[LINKED_BC_DEV_A_803:".*-gfx803-linked-.*bc"]] - -// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV_A_803]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-mcpu=gfx803" -// CHECK-SAME: "-o" [[OPT_BC_DEV_A_803:".*-gfx803-optimized.*bc"]] - -// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_A_803]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-mcpu=gfx803" -// CHECK-SAME: "-filetype=obj" -// CHECK-SAME: "-o" [[OBJ_DEV_A_803:".*-gfx803-.*o"]] +// CHECK-NOT: {{".*llvm-link"}} +// CHECK-NOT: {{".*opt"}} +// CHECK-NOT: {{".*llc"}} // CHECK: [[LLD: ".*lld.*"]] "-flavor" "gnu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]] @@ -47,25 +39,17 @@ // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" -// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" // CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" -// CHECK-SAME: {{.*}} "-o" [[A_BC_900:".*bc"]] "-x" "hip" +// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_900:".*o"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC]] -// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[A_BC_900]] -// CHECK-SAME: "-o" [[LINKED_BC_DEV_A_900:".*-gfx900-linked-.*bc"]] - -// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV_A_900]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-mcpu=gfx900" -// CHECK-SAME: "-o" [[OPT_BC_DEV_A_900:".*-gfx900-optimized.*bc"]] - -// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_A_900]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-mcpu=gfx900" -// CHECK-SAME: "-filetype=obj" -// CHECK-SAME: "-o" [[OBJ_DEV_A_900:".*-gfx900-.*o"]] +// CHECK-NOT: {{".*llvm-link"}} +// CHECK-NOT: {{".*opt"}} +// CHECK-NOT: {{".*llc"}} // CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]] @@ -92,25 +76,17 @@ // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" -// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803" // CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" -// CHECK-SAME: {{.*}} "-o" [[B_BC_803:".*bc"]] "-x" "hip" +// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_B_803:".*o"]] "-x" "hip" // CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]] -// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[B_BC_803]] -// CHECK-SAME: "-o" [[LINKED_BC_DEV_B_803:".*-gfx803-linked-.*bc"]] - -// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV_B_803]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-mcpu=gfx803" -// CHECK-SAME: "-o" [[OPT_BC_DEV_B_803:".*-gfx803-optimized.*bc"]] - -// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_B_803]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-mcpu=gfx803" -// CHECK-SAME: "-filetype=obj" -// CHECK-SAME: "-o" [[OBJ_DEV_B_803:".*-gfx803-.*o"]] +// CHECK-NOT: {{".*llvm-link"}} +// CHECK-NOT: {{".*opt"}} +// CHECK-NOT: {{".*llc"}} // CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_B_803:.*out]]" [[OBJ_DEV_B_803]] @@ -121,25 +97,17 @@ // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" -// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900" // CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" -// CHECK-SAME: {{.*}} "-o" [[B_BC_900:".*bc"]] "-x" "hip" +// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_B_900:".*o"]] "-x" "hip" // CHECK-SAME: {{.*}} [[B_SRC]] -// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[B_BC_900]] -// CHECK-SAME: "-o" [[LINKED_BC_DEV_B_900:".*-gfx900-linked-.*bc"]] - -// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV_B_900]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-mcpu=gfx900" -// CHECK-SAME: "-o" [[OPT_BC_DEV_B_900:".*-gfx900-optimized.*bc"]] - -// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_B_900]] "-mtriple=amdgcn-amd-amdhsa" -// CHECk-SAME: "-mcpu=gfx900" -// CHECK-SAME: "-filetype=obj" -// CHECK-SAME: "-o" [[OBJ_DEV_B_900:".*-gfx900-.*o"]] +// CHECK-NOT: {{".*llvm-link"}} +// CHECK-NOT: {{".*opt"}} +// CHECK-NOT: {{".*llc"}} // CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_B_900:.*out]]" [[OBJ_DEV_B_900]] Index: clang/test/Driver/hip-toolchain-opt.hip =================================================================== --- clang/test/Driver/hip-toolchain-opt.hip +++ clang/test/Driver/hip-toolchain-opt.hip @@ -68,27 +68,9 @@ // Oz-SAME: "-Oz" // Og-SAME: "-Og" -// ALL: "{{.*}}opt" -// DEFAULT-NOT: "-O{{.}}" -// O0-SAME: "-O0" -// O1-SAME: "-O1" -// O2-SAME: "-O2" -// O3-SAME: "-O3" -// Os-SAME: "-Os" -// Oz-SAME: "-Oz" -// Og-SAME: "-O1" -// ALL-SAME: "-mtriple=amdgcn-amd-amdhsa" +// ALL-NOT: "{{.*}}opt" -// ALL: "{{.*}}llc" -// DEFAULT-NOT: "-O{{.}}" -// O0-SAME: "-O0" -// O1-SAME: "-O1" -// O2-SAME: "-O2" -// O3-SAME: "-O3" -// Os-SAME: "-O2" -// Oz-SAME: "-O2" -// Og-SAME: "-O1" -// ALL-SAME: "-mtriple=amdgcn-amd-amdhsa" +// ALL-NOT: "{{.*}}llc" // ALL: "{{.*}}clang{{.*}}" "-cc1" "-triple" "x86_64-unknown-linux-gnu" // DEFAULT-NOT: "-O{{.}}"