Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8256,6 +8256,9 @@ def warn_kern_is_inline : Warning< "ignored 'inline' attribute on kernel function %0">, InGroup; +def err_hip_kern_without_gpu : Error< + "compiling a HIP kernel without specifying an offload arch is not allowed">, + DefaultFatal; def err_variadic_device_fn : Error< "CUDA device code does not support variadic functions">; def err_va_arg_in_device : Error< Index: clang/lib/Driver/ToolChains/HIP.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIP.cpp +++ clang/lib/Driver/ToolChains/HIP.cpp @@ -293,8 +293,11 @@ if (!BoundArch.empty()) { DAL->eraseArg(options::OPT_mcpu_EQ); - DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch); - checkTargetID(*DAL); + if (BoundArch != "unknown") { + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), + BoundArch); + checkTargetID(*DAL); + } } return DAL; @@ -355,7 +358,8 @@ llvm::SmallVector HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { llvm::SmallVector BCLibs; - if (DriverArgs.hasArg(options::OPT_nogpulib)) + StringRef GpuArch = getGPUArch(DriverArgs); + if (DriverArgs.hasArg(options::OPT_nogpulib) || GpuArch.empty()) return {}; ArgStringList LibraryPaths; @@ -386,7 +390,6 @@ getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0; return {}; } - StringRef GpuArch = getGPUArch(DriverArgs); assert(!GpuArch.empty() && "Must have an explicit GPU arch."); (void)GpuArch; auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); Index: clang/lib/Sema/SemaDeclAttr.cpp =================================================================== --- clang/lib/Sema/SemaDeclAttr.cpp +++ clang/lib/Sema/SemaDeclAttr.cpp @@ -4428,6 +4428,11 @@ } S.Diag(Method->getBeginLoc(), diag::warn_kern_is_method) << Method; } + if (S.getASTContext().getTargetInfo().getTargetOpts().CPU.empty() && + S.getLangOpts().HIP && S.getLangOpts().CUDAIsDevice) { + S.Diag(FD->getBeginLoc(), diag::err_hip_kern_without_gpu); + return; + } // Only warn for "inline" when compiling for host, to cut down on noise. if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice) S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD; Index: clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu +++ clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu @@ -1,5 +1,5 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -fcuda-is-device -emit-llvm -x hip -o - %s | FileCheck %s #include "Inputs/cuda.h" __global__ void hip_kernel_temp() { Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -1,9 +1,15 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \ +// RUN: -fcuda-is-device -emit-llvm -x hip %s -o - \ +// RUN: | FileCheck --check-prefixes=COMMON,CHECK %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \ +// RUN: -fcuda-is-device -emit-llvm -x hip %s \ +// RUN: -disable-O0-optnone -o - | opt -S -O2 \ +// RUN: | FileCheck %s --check-prefixes=COMMON,OPT +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -x hip %s -o - | FileCheck -check-prefix=HOST %s #include "Inputs/cuda.h" Index: clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu +++ clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \ // RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=CHECK,DEFAULT %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa --gpu-max-threads-per-block=1024 \ -// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ +// RUN: -target-cpu gfx906 -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=CHECK,MAX1024 %s // RUN: %clang_cc1 -triple nvptx \ // RUN: -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \ Index: clang/test/CodeGenCUDA/kernel-amdgcn.cu =================================================================== --- clang/test/CodeGenCUDA/kernel-amdgcn.cu +++ clang/test/CodeGenCUDA/kernel-amdgcn.cu @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -fcuda-is-device \ +// RUN: -emit-llvm -x hip %s -o - | FileCheck %s #include "Inputs/cuda.h" // CHECK: define{{.*}} amdgpu_kernel void @_ZN1A6kernelEv Index: clang/test/CodeGenCUDA/kernel-args.cu =================================================================== --- clang/test/CodeGenCUDA/kernel-args.cu +++ clang/test/CodeGenCUDA/kernel-args.cu @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \ -// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s +// RUN: -target-cpu gfx906 -emit-llvm %s -o - \ +// RUN: | FileCheck -check-prefix=AMDGCN %s // RUN: %clang_cc1 -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \ // RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s #include "Inputs/cuda.h" Index: clang/test/CodeGenCUDA/kernel-dbg-info.cu =================================================================== --- clang/test/CodeGenCUDA/kernel-dbg-info.cu +++ clang/test/CodeGenCUDA/kernel-dbg-info.cu @@ -5,7 +5,8 @@ // RUN: -o - -x hip | FileCheck -check-prefixes=CHECK,O0 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ -// RUN: -o - -x hip -fcuda-is-device | FileCheck -check-prefix=DEV %s +// RUN: -target-cpu gfx906 -o - -x hip -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O0 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ @@ -14,7 +15,8 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ // RUN: -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \ -// RUN: -fcuda-is-device | FileCheck -check-prefix=DEV %s +// RUN: -target-cpu gfx906 -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O3 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ @@ -22,7 +24,8 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O3 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ // RUN: -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \ -// RUN: -fcuda-is-device | FileCheck -check-prefix=DEV %s +// RUN: -target-cpu gfx906 -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s #include "Inputs/cuda.h" Index: clang/test/CodeGenCUDA/lambda-reference-var.cu =================================================================== --- clang/test/CodeGenCUDA/lambda-reference-var.cu +++ clang/test/CodeGenCUDA/lambda-reference-var.cu @@ -3,7 +3,7 @@ // RUN: | FileCheck -check-prefix=HOST %s // RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ // RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ -// RUN: | FileCheck -check-prefix=DEV %s +// RUN: -target-cpu gfx906 | FileCheck -check-prefix=DEV %s #include "Inputs/cuda.h" Index: clang/test/CodeGenCUDA/lambda.cu =================================================================== --- clang/test/CodeGenCUDA/lambda.cu +++ clang/test/CodeGenCUDA/lambda.cu @@ -3,7 +3,7 @@ // RUN: | FileCheck -check-prefix=HOST %s // RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ // RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ -// RUN: | FileCheck -check-prefix=DEV %s +// RUN: -target-cpu gfx906 | FileCheck -check-prefix=DEV %s #include "Inputs/cuda.h" Index: clang/test/CodeGenCUDA/managed-var.cu =================================================================== --- clang/test/CodeGenCUDA/managed-var.cu +++ clang/test/CodeGenCUDA/managed-var.cu @@ -1,11 +1,11 @@ // REQUIRES: x86-registered-target, amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ -// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -target-cpu gfx906 -emit-llvm -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=COMMON,DEV,NORDC-D %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ -// RUN: -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.dev +// RUN: -target-cpu gfx906 -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.dev // RUN: cat %t.dev | FileCheck -check-prefixes=COMMON,DEV,RDC-D %s // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \ Index: clang/test/CodeGenCUDA/norecurse.cu =================================================================== --- clang/test/CodeGenCUDA/norecurse.cu +++ clang/test/CodeGenCUDA/norecurse.cu @@ -5,7 +5,8 @@ // RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ -// RUN: -emit-llvm -disable-llvm-passes -o - -x hip %s | FileCheck %s +// RUN: -target-cpu gfx906 -emit-llvm -disable-llvm-passes \ +// RUN: -o - -x hip %s | FileCheck %s #include "Inputs/cuda.h" Index: clang/test/CodeGenCUDA/static-device-var-no-rdc.cu =================================================================== --- clang/test/CodeGenCUDA/static-device-var-no-rdc.cu +++ clang/test/CodeGenCUDA/static-device-var-no-rdc.cu @@ -2,7 +2,7 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ -// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -target-cpu gfx906 -emit-llvm -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=DEV %s // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \ Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu =================================================================== --- clang/test/CodeGenCUDA/static-device-var-rdc.cu +++ clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -2,7 +2,7 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ -// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -target-cpu gfx906 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=DEV,INT-DEV %s // RUN: %clang_cc1 -triple x86_64-gnu-linux \ @@ -10,7 +10,7 @@ // RUN: -check-prefixes=HOST,INT-HOST %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \ -// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev +// RUN: -target-cpu gfx906 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev // RUN: cat %t.dev | FileCheck -check-prefixes=DEV,EXT-DEV %s // RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ Index: clang/test/CodeGenCUDA/unnamed-types.cu =================================================================== --- clang/test/CodeGenCUDA/unnamed-types.cu +++ clang/test/CodeGenCUDA/unnamed-types.cu @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC -// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE +// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx906 -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE #include "Inputs/cuda.h" Index: clang/test/Driver/cuda-flush-denormals-to-zero.cu =================================================================== --- clang/test/Driver/cuda-flush-denormals-to-zero.cu +++ clang/test/Driver/cuda-flush-denormals-to-zero.cu @@ -26,8 +26,11 @@ // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s -// Test no subtarget, which should get the denormal setting of the default gfx803 -// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s +// Test no subtarget, which should get the denormal setting of the default +// CPU of AMDGPU target, which is 'none'. +// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c \ +// RUN: -march=haswell -nocudainc -nogpulib %s 2>&1 \ +// RUN: | FileCheck -check-prefix=NOFTZ %s // Test multiple offload archs with different defaults. // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=MIXED-DEFAULT-MODE %s Index: clang/test/Driver/hip-default-gpu-arch.hip =================================================================== --- clang/test/Driver/hip-default-gpu-arch.hip +++ clang/test/Driver/hip-default-gpu-arch.hip @@ -4,4 +4,5 @@ // RUN: %clang -### -c %s 2>&1 | FileCheck %s -// CHECK: {{.*}}clang{{.*}}"-target-cpu" "gfx803" +// CHECK-NOT: {{.*}}clang{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" +// CHECK: {{.*}}clang{{.*}}"-triple" "amdgcn-amd-amdhsa" Index: clang/test/SemaCUDA/kernel-no-gpu.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/kernel-no-gpu.cu @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -fcuda-is-device -verify=hip -x hip %s +// RUN: %clang_cc1 -fcuda-is-device -verify=cuda %s +// cuda-no-diagnostics + +#include "Inputs/cuda.h" + +__global__ void kern1() {} +// hip-error@-1 {{compiling a HIP kernel without specifying an offload arch is not allowed}} + +// Make sure the error is emitted once. +__global__ void kern2() {}