diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -2765,7 +2765,7 @@ CudaActionBuilder(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs) : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) { - DefaultCudaArch = CudaArch::SM_20; + DefaultCudaArch = CudaArch::SM_35; } StringRef getCanonicalOffloadArch(StringRef ArchStr) override { diff --git a/clang/test/Driver/cuda-external-tools.cu b/clang/test/Driver/cuda-external-tools.cu --- a/clang/test/Driver/cuda-external-tools.cu +++ b/clang/test/Driver/cuda-external-tools.cu @@ -7,96 +7,120 @@ // Regular compiles with -O{0,1,2,3,4,fast}. -O4 and -Ofast map to ptxas O3. // RUN: %clang -### -target x86_64-linux-gnu -O0 -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s // RUN: %clang -### -target x86_64-linux-gnu -O1 -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT1 %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT1 %s // RUN: %clang -### -target x86_64-linux-gnu -O2 -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s // RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s // RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s // RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s // Generating relocatable device code // RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s // With debugging enabled, ptxas should be run with with no ptxas optimizations. // RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -g -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,DBG %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,DBG %s // --no-cuda-noopt-device-debug overrides --cuda-noopt-device-debug. // RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug \ +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: --no-cuda-noopt-device-debug -O2 -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s // Regular compile without -O. This should result in us passing -O0 to ptxas. // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s // Regular compiles with -Os and -Oz. For lack of a better option, we map // these to ptxas -O3. // RUN: %clang -### -target x86_64-linux-gnu -Os -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s // RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s // Regular compile targeting sm_35. // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \ +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s // Separate compilation targeting sm_35. // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \ +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s // 32-bit compile. // RUN: %clang -### -target i386-linux-gnu -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20 %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35 %s // 32-bit compile when generating relocatable device code. // RUN: %clang -### -target i386-linux-gnu -fgpu-rdc -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20,RDC %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s // Compile with -fintegrated-as. This should still cause us to invoke ptxas. // RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s // Check that we still pass -c when generating relocatable device code. // RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s // Check -Xcuda-ptxas and -Xcuda-fatbinary // RUN: %clang -### -target x86_64-linux-gnu -c -Xcuda-ptxas -foo1 \ +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: -Xcuda-fatbinary -bar1 -Xcuda-ptxas -foo2 -Xcuda-fatbinary -bar2 %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,SM20,PTXAS-EXTRA,FATBINARY-EXTRA %s +// RUN: | FileCheck -check-prefixes=CHECK,SM35,PTXAS-EXTRA,FATBINARY-EXTRA %s // MacOS spot-checks // RUN: %clang -### -target x86_64-apple-macosx -O0 -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s // RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -c %s 2>&1 \ +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s // RUN: %clang -### -target i386-apple-macosx -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20 %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35 %s // Check relocatable device code generation on MacOS. // RUN: %clang -### -target x86_64-apple-macosx -O0 -fgpu-rdc -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s // RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \ +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s // RUN: %clang -### -target i386-apple-macosx -fgpu-rdc -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20,RDC %s +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s // Check that CLANG forwards the -v flag to PTXAS. -// RUN: %clang -### -save-temps -no-canonical-prefixes -v %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-PTXAS-VERBOSE %s +// RUN: %clang -### -save-temps -no-canonical-prefixes -v %s 2>&1 \ +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: | FileCheck -check-prefix=CHK-PTXAS-VERBOSE %s // Match clang job that produces PTX assembly. // CHECK: "-cc1" // ARCH64-SAME: "-triple" "nvptx64-nvidia-cuda" // ARCH32-SAME: "-triple" "nvptx-nvidia-cuda" -// SM20-SAME: "-target-cpu" "sm_20" // SM35-SAME: "-target-cpu" "sm_35" // RDC-SAME: "-fgpu-rdc" // CHECK-NOT: "-fgpu-rdc" -// SM20-SAME: "-o" "[[PTXFILE:[^"]*]]" // SM35-SAME: "-o" "[[PTXFILE:[^"]*]]" // Match the call to ptxas (which assembles PTX to SASS). @@ -112,9 +136,7 @@ // OPT3-SAME: "-O3" // OPT3-NOT: "-g" // DBG-SAME: "-g" "--dont-merge-basicblocks" "--return-at-end" -// SM20-SAME: "--gpu-name" "sm_20" // SM35-SAME: "--gpu-name" "sm_35" -// SM20-SAME: "--output-file" "[[CUBINFILE:[^"]*]]" // SM35-SAME: "--output-file" "[[CUBINFILE:[^"]*]]" // CHECK-SAME: "[[PTXFILE]]" // PTXAS-EXTRA-SAME: "-foo1" @@ -129,9 +151,7 @@ // ARCH64-SAME-DAG: "-64" // ARCH32-SAME-DAG: "-32" // CHECK-DAG: "--create" "[[FATBINARY:[^"]*]]" -// SM20-SAME-DAG: "--image=profile=compute_20,file=[[PTXFILE]]" // SM35-SAME-DAG: "--image=profile=compute_35,file=[[PTXFILE]]" -// SM20-SAME-DAG: "--image=profile=sm_20,file=[[CUBINFILE]]" // SM35-SAME-DAG: "--image=profile=sm_35,file=[[CUBINFILE]]" // FATBINARY-EXTRA-SAME: "-bar1" // FATBINARY-EXTRA-SAME: "-bar2" diff --git a/clang/test/Driver/cuda-march.cu b/clang/test/Driver/cuda-march.cu --- a/clang/test/Driver/cuda-march.cu +++ b/clang/test/Driver/cuda-march.cu @@ -9,14 +9,14 @@ // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c \ // RUN: -march=haswell %s 2>&1 | FileCheck %s // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c \ -// RUN: -march=haswell --cuda-gpu-arch=sm_20 %s 2>&1 | FileCheck %s +// RUN: -march=haswell --cuda-gpu-arch=sm_35 %s 2>&1 | FileCheck %s // CHECK: {{.*}}clang{{.*}}" "-cc1" // CHECK-SAME: "-triple" "nvptx -// CHECK-SAME: "-target-cpu" "sm_20" +// CHECK-SAME: "-target-cpu" "sm_35" // CHECK: ptxas -// CHECK-SAME: "--gpu-name" "sm_20" +// CHECK-SAME: "--gpu-name" "sm_35" // CHECK: {{.*}}clang{{.*}}" "-cc1" // CHECK-SAME: "-target-cpu" "haswell" diff --git a/clang/test/Driver/cuda-options-freebsd.cu b/clang/test/Driver/cuda-options-freebsd.cu deleted file mode 100644 --- a/clang/test/Driver/cuda-options-freebsd.cu +++ /dev/null @@ -1,289 +0,0 @@ -// Tests CUDA compilation pipeline construction in Driver. -// REQUIRES: clang-driver -// REQUIRES: x86-registered-target -// REQUIRES: nvptx-registered-target - -// Simple compilation case. Compile device-side to PTX assembly and make sure -// we use it on the host side. -// RUN: %clang -### -target x86_64-unknown-freebsd -c %s 2>&1 \ -// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ -// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ -// RUN: -check-prefix NOLINK %s - -// Typical compilation + link case. -// RUN: %clang -### -target x86_64-unknown-freebsd %s 2>&1 \ -// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ -// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ -// RUN: -check-prefix LINK %s - -// Verify that --cuda-host-only disables device-side compilation, but doesn't -// disable host-side compilation/linking. -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-host-only %s 2>&1 \ -// RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ -// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s - -// Verify that --cuda-device-only disables host-side compilation and linking. -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only %s 2>&1 \ -// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ -// RUN: -check-prefix NOHOST -check-prefix NOLINK %s - -// Check that the last of --cuda-compile-host-device, --cuda-host-only, and -// --cuda-device-only wins. - -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \ -// RUN: --cuda-host-only %s 2>&1 \ -// RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ -// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s - -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-compile-host-device \ -// RUN: --cuda-host-only %s 2>&1 \ -// RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ -// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s - -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-host-only \ -// RUN: --cuda-device-only %s 2>&1 \ -// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ -// RUN: -check-prefix NOHOST -check-prefix NOLINK %s - -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-compile-host-device \ -// RUN: --cuda-device-only %s 2>&1 \ -// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ -// RUN: -check-prefix NOHOST -check-prefix NOLINK %s - -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-host-only \ -// RUN: --cuda-compile-host-device %s 2>&1 \ -// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ -// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ -// RUN: -check-prefix LINK %s - -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \ -// RUN: --cuda-compile-host-device %s 2>&1 \ -// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ -// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ -// RUN: -check-prefix LINK %s - -// Verify that --cuda-gpu-arch option passes the correct GPU architecture to -// device compilation. -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-gpu-arch=sm_30 -c %s 2>&1 \ -// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ -// RUN: -check-prefix DEVICE-SM30 -check-prefix HOST \ -// RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s - -// Verify that there is one device-side compilation per --cuda-gpu-arch args -// and that all results are included on the host side. -// RUN: %clang -### -target x86_64-unknown-freebsd \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \ -// RUN: -check-prefixes DEVICE-SM30,DEVICE2-SM35 \ -// RUN: -check-prefixes INCLUDES-DEVICE,INCLUDES-DEVICE2 \ -// RUN: -check-prefixes HOST,HOST-NOSAVE,NOLINK %s - -// Verify that device-side results are passed to the correct tool when -// -save-temps is used. -// RUN: %clang -### -target x86_64-unknown-freebsd -save-temps -c %s 2>&1 \ -// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-SAVE \ -// RUN: -check-prefix HOST -check-prefix HOST-SAVE -check-prefix NOLINK %s - -// Verify that device-side results are passed to the correct tool when -// -fno-integrated-as is used. -// RUN: %clang -### -target x86_64-unknown-freebsd -fno-integrated-as -c %s 2>&1 \ -// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ -// RUN: -check-prefix HOST -check-prefix HOST-NOSAVE \ -// RUN: -check-prefix HOST-AS -check-prefix NOLINK %s - -// Verify that --[no-]cuda-gpu-arch arguments are handled correctly. -// a) --no-cuda-gpu-arch=X negates preceding --cuda-gpu-arch=X -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ -// RUN: --no-cuda-gpu-arch=sm_35 \ -// RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s - -// b) --no-cuda-gpu-arch=X negates more than one preceding --cuda-gpu-arch=X -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ -// RUN: --no-cuda-gpu-arch=sm_35 \ -// RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s - -// c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X -// we default to sm_20 -- same as if no --cuda-gpu-arch were passed. -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ -// RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \ -// RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes ARCH-SM20,NOARCH-SM30,NOARCH-SM35 %s - -// d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30\ -// RUN: --no-cuda-gpu-arch=sm_50 \ -// RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s - -// e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \ -// RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ -// RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s - -// f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \ -// RUN: --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 \ -// RUN: --no-cuda-gpu-arch=all \ -// RUN: --cuda-gpu-arch=sm_35 \ -// RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes NOARCH-SM20,NOARCH-SM30,ARCH-SM35 %s - -// g) There's no --cuda-gpu-arch=all -// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \ -// RUN: --cuda-gpu-arch=all \ -// RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefix ARCHALLERROR %s - - -// Verify that --[no-]cuda-include-ptx arguments are handled correctly. -// a) by default we're including PTX for all GPUs. -// RUN: %clang -### -target x86_64-unknown-freebsd \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ -// RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s - -// b) --no-cuda-include-ptx=all disables PTX inclusion for all GPUs -// RUN: %clang -### -target x86_64-unknown-freebsd \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ -// RUN: --no-cuda-include-ptx=all \ -// RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,NOPTX-SM30 %s - -// c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only. -// RUN: %clang -### -target x86_64-unknown-freebsd \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ -// RUN: --no-cuda-include-ptx=sm_35 \ -// RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,PTX-SM30 %s -// RUN: %clang -### -target x86_64-unknown-freebsd \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ -// RUN: --no-cuda-include-ptx=sm_30 \ -// RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,NOPTX-SM30 %s - -// d) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=all -// RUN: %clang -### -target x86_64-unknown-freebsd \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ -// RUN: --no-cuda-include-ptx=all --cuda-include-ptx=all \ -// RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s - -// e) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=sm_XX -// RUN: %clang -### -target x86_64-unknown-freebsd \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ -// RUN: --no-cuda-include-ptx=sm_30 --cuda-include-ptx=all \ -// RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s - - -// ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20" -// NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20" -// ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30" -// NOARCH-SM30-NOT: "-cc1"{{.*}}"-target-cpu" "sm_30" -// ARCH-SM35: "-cc1"{{.*}}"-target-cpu" "sm_35" -// NOARCH-SM35-NOT: "-cc1"{{.*}}"-target-cpu" "sm_35" -// ARCHALLERROR: error: unsupported CUDA gpu architecture: all - -// Match device-side preprocessor and compiler phases with -save-temps. -// DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda" -// DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-freebsd" -// DEVICE-SAVE-SAME: "-fcuda-is-device" -// DEVICE-SAVE-SAME: "-x" "cuda" - -// DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda" -// DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-freebsd" -// DEVICE-SAVE-SAME: "-fcuda-is-device" -// DEVICE-SAVE-SAME: "-x" "cuda-cpp-output" - -// Match the job that produces PTX assembly. -// DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda" -// DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-freebsd" -// DEVICE-SAME: "-fcuda-is-device" -// DEVICE-SM30-SAME: "-target-cpu" "sm_30" -// DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]" -// DEVICE-NOSAVE-SAME: "-x" "cuda" -// DEVICE-SAVE-SAME: "-x" "ir" - -// Match the call to ptxas (which assembles PTX to SASS). -// DEVICE:ptxas -// DEVICE-SM30-DAG: "--gpu-name" "sm_30" -// DEVICE-DAG: "--output-file" "[[CUBINFILE:[^"]*]]" -// DEVICE-DAG: "[[PTXFILE]]" - -// Match another device-side compilation. -// DEVICE2: "-cc1" "-triple" "nvptx64-nvidia-cuda" -// DEVICE2-SAME: "-aux-triple" "x86_64-unknown-freebsd" -// DEVICE2-SAME: "-fcuda-is-device" -// DEVICE2-SM35-SAME: "-target-cpu" "sm_35" -// DEVICE2-SAME: "-o" "[[PTXFILE2:[^"]*]]" -// DEVICE2-SAME: "-x" "cuda" - -// Match another call to ptxas. -// DEVICE2: ptxas -// DEVICE2-SM35-DAG: "--gpu-name" "sm_35" -// DEVICE2-DAG: "--output-file" "[[CUBINFILE2:[^"]*]]" -// DEVICE2-DAG: "[[PTXFILE2]]" - -// Match no device-side compilation. -// NODEVICE-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda" -// NODEVICE-NOT: "-fcuda-is-device" - -// INCLUDES-DEVICE:fatbinary -// INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]" -// INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]" -// INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]" -// INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]" -// INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]" - -// Match host-side preprocessor job with -save-temps. -// HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-freebsd" -// HOST-SAVE-SAME: "-aux-triple" "nvptx64-nvidia-cuda" -// HOST-SAVE-NOT: "-fcuda-is-device" -// HOST-SAVE-SAME: "-x" "cuda" - -// Match host-side compilation. -// HOST: "-cc1" "-triple" "x86_64-unknown-freebsd" -// HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda" -// HOST-NOT: "-fcuda-is-device" -// There is only one GPU binary after combining it with fatbinary! -// INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" -// INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]" -// There is only one GPU binary after combining it with fatbinary. -// INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" -// HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]" -// HOST-NOSAVE-SAME: "-x" "cuda" -// HOST-SAVE-SAME: "-x" "cuda-cpp-output" - -// Match external assembler that uses compilation output. -// HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]" - -// Match no GPU code inclusion. -// NOINCLUDES-DEVICE-NOT: "-fcuda-include-gpubinary" - -// Match no host compilation. -// NOHOST-NOT: "-cc1" "-triple" -// NOHOST-NOT: "-x" "cuda" - -// Match linker. -// LINK: "{{.*}}{{ld|link}}{{(.exe)?}}" -// LINK-SAME: "[[HOSTOUTPUT]]" - -// Match no linker. -// NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}" - -// FATBIN-COMMON:fatbinary -// FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]" -// FATBIN-COMMON: "--image=profile=sm_30,file= -// PTX-SM30: "--image=profile=compute_30,file= -// NOPTX-SM30-NOT: "--image=profile=compute_30,file= -// FATBIN-COMMON: "--image=profile=sm_35,file= -// PTX-SM35: "--image=profile=compute_35,file= -// NOPTX-SM35-NOT: "--image=profile=compute_35,file= diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu --- a/clang/test/Driver/cuda-options.cu +++ b/clang/test/Driver/cuda-options.cu @@ -94,47 +94,47 @@ // Verify that --[no-]cuda-gpu-arch arguments are handled correctly. // a) --no-cuda-gpu-arch=X negates preceding --cuda-gpu-arch=X // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ -// RUN: --no-cuda-gpu-arch=sm_35 \ +// RUN: --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_30 \ +// RUN: --no-cuda-gpu-arch=sm_50 \ // RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s +// RUN: | FileCheck -check-prefixes ARCH-SM30,NOARCH-SM35,NOARCH-SM50 %s // b) --no-cuda-gpu-arch=X negates more than one preceding --cuda-gpu-arch=X // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ -// RUN: --no-cuda-gpu-arch=sm_35 \ +// RUN: --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_30 \ +// RUN: --no-cuda-gpu-arch=sm_50 \ // RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s +// RUN: | FileCheck -check-prefixes ARCH-SM30,NOARCH-SM35,NOARCH-SM50 %s // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X -// we default to sm_20 -- same as if no --cuda-gpu-arch were passed. +// we default to sm_35 -- same as if no --cuda-gpu-arch were passed. // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ -// RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \ +// RUN: --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_30 \ +// RUN: --no-cuda-gpu-arch=sm_50 --no-cuda-gpu-arch=sm_30 \ // RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes ARCH-SM20,NOARCH-SM30,NOARCH-SM35 %s +// RUN: | FileCheck -check-prefixes NOARCH-SM30,ARCH-SM35,NOARCH-SM50 %s // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30\ // RUN: --no-cuda-gpu-arch=sm_50 \ // RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s +// RUN: | FileCheck -check-prefixes ARCH-SM30,ARCH-SM35,NOARCH-SM50 %s // e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ -// RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \ -// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ +// RUN: --no-cuda-gpu-arch=sm_50 --no-cuda-gpu-arch=sm_30 \ +// RUN: --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_30 \ // RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s +// RUN: | FileCheck -check-prefixes ARCH-SM30,NOARCH-SM35,ARCH-SM50 %s // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ -// RUN: --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 \ +// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ // RUN: --no-cuda-gpu-arch=all \ -// RUN: --cuda-gpu-arch=sm_35 \ +// RUN: --cuda-gpu-arch=sm_50 \ // RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes NOARCH-SM20,NOARCH-SM30,ARCH-SM35 %s +// RUN: | FileCheck -check-prefixes NOARCH-SM30,NOARCH-SM35,ARCH-SM50 %s // g) There's no --cuda-gpu-arch=all // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ @@ -190,12 +190,12 @@ // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s // THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' -// ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20" -// NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20" // ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30" // NOARCH-SM30-NOT: "-cc1"{{.*}}"-target-cpu" "sm_30" // ARCH-SM35: "-cc1"{{.*}}"-target-cpu" "sm_35" // NOARCH-SM35-NOT: "-cc1"{{.*}}"-target-cpu" "sm_35" +// ARCH-SM50: "-cc1"{{.*}}"-target-cpu" "sm_50" +// NOARCH-SM50-NOT: "-cc1"{{.*}}"-target-cpu" "sm_50" // ARCHALLERROR: error: unsupported CUDA gpu architecture: all // Match device-side preprocessor and compiler phases with -save-temps. diff --git a/clang/test/Driver/cuda-ptxas-path.cu b/clang/test/Driver/cuda-ptxas-path.cu --- a/clang/test/Driver/cuda-ptxas-path.cu +++ b/clang/test/Driver/cuda-ptxas-path.cu @@ -9,4 +9,4 @@ // CHECK-NOT: "ptxas" // CHECK: "/some/path/to/ptxas" -// CHECK-SAME: "--gpu-name" "sm_20" +// CHECK-SAME: "--gpu-name" "sm_35" diff --git a/clang/test/Driver/lto.cu b/clang/test/Driver/lto.cu --- a/clang/test/Driver/lto.cu +++ b/clang/test/Driver/lto.cu @@ -16,13 +16,13 @@ // CHECK-COMPILELINK-ACTIONS: 0: input, "{{.*}}lto.cu", cuda, (host-cuda) // CHECK-COMPILELINK-ACTIONS: 1: preprocessor, {0}, cuda-cpp-output // CHECK-COMPILELINK-ACTIONS: 2: compiler, {1}, ir, (host-cuda) -// CHECK-COMPILELINK-ACTIONS: 3: input, "{{.*}}lto.cu", cuda, (device-cuda, sm_20) -// CHECK-COMPILELINK-ACTIONS: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_20) -// CHECK-COMPILELINK-ACTIONS: 5: compiler, {4}, ir, (device-cuda, sm_20) -// CHECK-COMPILELINK-ACTIONS: 6: backend, {5}, assembler, (device-cuda, sm_20) -// CHECK-COMPILELINK-ACTIONS: 7: assembler, {6}, object, (device-cuda, sm_20) -// CHECK-COMPILELINK-ACTIONS: 8: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_20)" {7}, object -// CHECK-COMPILELINK-ACTIONS: 9: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_20)" {6}, assembler +// CHECK-COMPILELINK-ACTIONS: 3: input, "{{.*}}lto.cu", cuda, (device-cuda, sm_{{.*}}) +// CHECK-COMPILELINK-ACTIONS: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_{{.*}}) +// CHECK-COMPILELINK-ACTIONS: 5: compiler, {4}, ir, (device-cuda, sm_{{.*}}) +// CHECK-COMPILELINK-ACTIONS: 6: backend, {5}, assembler, (device-cuda, sm_{{.*}}) +// CHECK-COMPILELINK-ACTIONS: 7: assembler, {6}, object, (device-cuda, sm_{{.*}}) +// CHECK-COMPILELINK-ACTIONS: 8: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_{{.*}})" {7}, object +// CHECK-COMPILELINK-ACTIONS: 9: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_{{.*}})" {6}, assembler // CHECK-COMPILELINK-ACTIONS: 10: linker, {8, 9}, cuda-fatbin, (device-cuda) // CHECK-COMPILELINK-ACTIONS: 11: offload, "host-cuda {{.*}}" {2}, "device-cuda{{.*}}" {10}, ir // CHECK-COMPILELINK-ACTIONS: 12: backend, {11}, lto-bc, (host-cuda) diff --git a/clang/test/Driver/openmp-offload.c b/clang/test/Driver/openmp-offload.c --- a/clang/test/Driver/openmp-offload.c +++ b/clang/test/Driver/openmp-offload.c @@ -215,13 +215,13 @@ // CHK-PHASES-WITH-CUDA: 0: input, "[[INPUT:.+\.c]]", cuda, (host-cuda-openmp) // CHK-PHASES-WITH-CUDA: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda-openmp) // CHK-PHASES-WITH-CUDA: 2: compiler, {1}, ir, (host-cuda-openmp) -// CHK-PHASES-WITH-CUDA: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_20) -// CHK-PHASES-WITH-CUDA: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_20) -// CHK-PHASES-WITH-CUDA: 5: compiler, {4}, ir, (device-cuda, sm_20) -// CHK-PHASES-WITH-CUDA: 6: backend, {5}, assembler, (device-cuda, sm_20) -// CHK-PHASES-WITH-CUDA: 7: assembler, {6}, object, (device-cuda, sm_20) -// CHK-PHASES-WITH-CUDA: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {7}, object -// CHK-PHASES-WITH-CUDA: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {6}, assembler +// CHK-PHASES-WITH-CUDA: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_{{.*}}) +// CHK-PHASES-WITH-CUDA: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_{{.*}}) +// CHK-PHASES-WITH-CUDA: 5: compiler, {4}, ir, (device-cuda, sm_{{.*}}) +// CHK-PHASES-WITH-CUDA: 6: backend, {5}, assembler, (device-cuda, sm_{{.*}}) +// CHK-PHASES-WITH-CUDA: 7: assembler, {6}, object, (device-cuda, sm_{{.*}}) +// CHK-PHASES-WITH-CUDA: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_{{.*}})" {7}, object +// CHK-PHASES-WITH-CUDA: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_{{.*}})" {6}, assembler // CHK-PHASES-WITH-CUDA: 10: linker, {8, 9}, cuda-fatbin, (device-cuda) // CHK-PHASES-WITH-CUDA: 11: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {10}, ir // CHK-PHASES-WITH-CUDA: 12: backend, {11}, assembler, (host-cuda-openmp) diff --git a/clang/test/Driver/opt-record.c b/clang/test/Driver/opt-record.c --- a/clang/test/Driver/opt-record.c +++ b/clang/test/Driver/opt-record.c @@ -24,7 +24,7 @@ // CHECK-NO-O: "-cc1" // CHECK-NO-O-DAG: "-opt-record-file" "opt-record.opt.yaml" -// CHECK-CUDA-DEV-DAG: "-opt-record-file" "opt-record-cuda-{{nvptx64|nvptx}}-nvidia-cuda-sm_20.opt.yaml" +// CHECK-CUDA-DEV-DAG: "-opt-record-file" "opt-record-cuda-{{nvptx64|nvptx}}-nvidia-cuda-sm_{{.*}}.opt.yaml" // CHECK-EQ: "-cc1" // CHECK-EQ: "-opt-record-file" "BAR.txt" diff --git a/clang/test/Driver/thinlto.cu b/clang/test/Driver/thinlto.cu --- a/clang/test/Driver/thinlto.cu +++ b/clang/test/Driver/thinlto.cu @@ -16,13 +16,13 @@ // CHECK-COMPILELINK-ACTIONS: 0: input, "{{.*}}thinlto.cu", cuda, (host-cuda) // CHECK-COMPILELINK-ACTIONS: 1: preprocessor, {0}, cuda-cpp-output // CHECK-COMPILELINK-ACTIONS: 2: compiler, {1}, ir, (host-cuda) -// CHECK-COMPILELINK-ACTIONS: 3: input, "{{.*}}thinlto.cu", cuda, (device-cuda, sm_20) -// CHECK-COMPILELINK-ACTIONS: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_20) -// CHECK-COMPILELINK-ACTIONS: 5: compiler, {4}, ir, (device-cuda, sm_20) -// CHECK-COMPILELINK-ACTIONS: 6: backend, {5}, assembler, (device-cuda, sm_20) -// CHECK-COMPILELINK-ACTIONS: 7: assembler, {6}, object, (device-cuda, sm_20) -// CHECK-COMPILELINK-ACTIONS: 8: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_20)" {7}, object -// CHECK-COMPILELINK-ACTIONS: 9: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_20)" {6}, assembler +// CHECK-COMPILELINK-ACTIONS: 3: input, "{{.*}}thinlto.cu", cuda, (device-cuda, sm_{{.*}}) +// CHECK-COMPILELINK-ACTIONS: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_{{.*}}) +// CHECK-COMPILELINK-ACTIONS: 5: compiler, {4}, ir, (device-cuda, sm_{{.*}}) +// CHECK-COMPILELINK-ACTIONS: 6: backend, {5}, assembler, (device-cuda, sm_{{.*}}) +// CHECK-COMPILELINK-ACTIONS: 7: assembler, {6}, object, (device-cuda, sm_{{.*}}) +// CHECK-COMPILELINK-ACTIONS: 8: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_{{.*}})" {7}, object +// CHECK-COMPILELINK-ACTIONS: 9: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_{{.*}})" {6}, assembler // CHECK-COMPILELINK-ACTIONS: 10: linker, {8, 9}, cuda-fatbin, (device-cuda) // CHECK-COMPILELINK-ACTIONS: 11: offload, "host-cuda {{.*}}" {2}, "device-cuda{{.*}}" {10}, ir // CHECK-COMPILELINK-ACTIONS: 12: backend, {11}, lto-bc, (host-cuda)