Index: clang/include/clang/Basic/Cuda.h =================================================================== --- clang/include/clang/Basic/Cuda.h +++ clang/include/clang/Basic/Cuda.h @@ -37,6 +37,7 @@ CudaVersion CudaStringToVersion(const llvm::Twine &S); enum class CudaArch { + UNUSED, UNKNOWN, SM_20, SM_21, Index: clang/lib/Basic/Cuda.cpp =================================================================== --- clang/lib/Basic/Cuda.cpp +++ clang/lib/Basic/Cuda.cpp @@ -92,6 +92,8 @@ #undef GFX const char *CudaArchToString(CudaArch A) { + if (A == CudaArch::UNUSED) + return ""; auto result = std::find_if( std::begin(arch_names), std::end(arch_names), [A](const CudaArchToStringMap &map) { return A == map.arch; }); @@ -101,6 +103,8 @@ } const char *CudaArchToVirtualArchString(CudaArch A) { + if (A == CudaArch::UNUSED) + return ""; auto result = std::find_if( std::begin(arch_names), std::end(arch_names), [A](const CudaArchToStringMap &map) { return A == map.arch; }); Index: clang/lib/Basic/Targets/NVPTX.cpp =================================================================== --- clang/lib/Basic/Targets/NVPTX.cpp +++ clang/lib/Basic/Targets/NVPTX.cpp @@ -204,6 +204,7 @@ case CudaArch::GFX1031: case CudaArch::LAST: break; + case CudaArch::UNUSED: case CudaArch::UNKNOWN: assert(false && "No GPU arch when compiling CUDA device code."); return ""; Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -5034,6 +5034,7 @@ case CudaArch::GFX1012: case CudaArch::GFX1030: case CudaArch::GFX1031: + case CudaArch::UNUSED: case CudaArch::UNKNOWN: break; case CudaArch::LAST: @@ -5095,6 +5096,7 @@ case CudaArch::GFX1012: case CudaArch::GFX1030: case CudaArch::GFX1031: + case CudaArch::UNUSED: case CudaArch::UNKNOWN: break; case CudaArch::LAST: Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -2517,7 +2517,7 @@ // If we have a fat binary, add it to the list. if (CudaFatBinary) { - AddTopLevel(CudaFatBinary, CudaArch::UNKNOWN); + AddTopLevel(CudaFatBinary, CudaArch::UNUSED); CudaDeviceActions.clear(); CudaFatBinary = nullptr; return; Index: clang/test/Driver/hip-phases.hip =================================================================== --- clang/test/Driver/hip-phases.hip +++ clang/test/Driver/hip-phases.hip @@ -219,6 +219,12 @@ // DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) // DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image +// DBIN-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, hip-fatbin, (device-hip, ) +// DBIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P7]]}, hip-fatbin // DBIN-NOT: host // // Test single gpu architecture up to the assemble phase in device-only @@ -230,6 +236,8 @@ // DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) // DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) +// DASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, assembler // DASM-NOT: host // @@ -242,9 +250,19 @@ // DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) // DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) -// DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) -// DBIN2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) -// DBIN2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) +// DBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]]) +// DBIN2-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image +// DBIN2-DAG: [[P7:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) +// DBIN2-DAG: [[P8:[0-9]+]]: preprocessor, {[[P7]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P9:[0-9]+]]: compiler, {[[P8]]}, ir, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P12:[0-9]+]]: linker, {[[P11]]}, image, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P13:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P12]]}, image +// DBIN2-DAG: [[P14:[0-9]+]]: linker, {[[P6]], [[P13]]}, hip-fatbin, (device-hip, ) +// DBIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P14]]}, hip-fatbin // DBIN2-NOT: host // // Test two gpu architectures up to the assemble phase in device-only @@ -257,9 +275,13 @@ // DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) // DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) +// DASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, assembler // DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) // DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) +// DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]]) +// DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, assembler // DASM2-NOT: host // Index: clang/test/Driver/hip-toolchain-device-only.hip =================================================================== --- /dev/null +++ clang/test/Driver/hip-toolchain-device-only.hip @@ -0,0 +1,29 @@ +// REQUIRES: clang-driver, amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --offload-arch=gfx803 --offload-arch=gfx900 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib -c \ +// RUN: %s 2>&1 | FileCheck -check-prefixes=CHECK,LINK %s + +// CHECK-NOT: error: + +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-fcuda-is-device" +// CHECK-SAME: "-target-cpu" "gfx803" +// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_803:".*o"]] "-x" "hip" + +// CHECK: [[LLD: ".*lld.*"]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]] + +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-emit-obj" +// CHECK-SAME: "-fcuda-is-device" +// CHECK-SAME: "-target-cpu" "gfx900" +// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_900:".*o"]] "-x" "hip" + +// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]] + +// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// CHECK-SAME: "-inputs={{.*}},[[IMG_DEV_A_803]],[[IMG_DEV_A_900]]" "-outputs=[[BUNDLE_A:.*hipfb]]"