diff --git a/llvm/test/CodeGen/NVPTX/aggregate-return.ll b/llvm/test/CodeGen/NVPTX/aggregate-return.ll --- a/llvm/test/CodeGen/NVPTX/aggregate-return.ll +++ b/llvm/test/CodeGen/NVPTX/aggregate-return.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} declare <2 x float> @barv(<2 x float> %input) declare <3 x float> @barv3(<3 x float> %input) diff --git a/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll --- a/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll +++ b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefixes=ALL,SM30 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s --check-prefixes=ALL,SM60 -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %} ; CHECK-LABEL: fadd_double diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll --- a/llvm/test/CodeGen/NVPTX/atomics.ll +++ b/llvm/test/CodeGen/NVPTX/atomics.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_32 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_32 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_32%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_32 | %ptxas-verify %} ; CHECK-LABEL: atom0 diff --git a/llvm/test/CodeGen/NVPTX/barrier.ll b/llvm/test/CodeGen/NVPTX/barrier.ll --- a/llvm/test/CodeGen/NVPTX/barrier.ll +++ b/llvm/test/CodeGen/NVPTX/barrier.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %} declare void @llvm.nvvm.bar.warp.sync(i32) declare void @llvm.nvvm.barrier.sync(i32) diff --git a/llvm/test/CodeGen/NVPTX/branch-fold.ll b/llvm/test/CodeGen/NVPTX/branch-fold.ll --- a/llvm/test/CodeGen/NVPTX/branch-fold.ll +++ b/llvm/test/CodeGen/NVPTX/branch-fold.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp -verify-machineinstrs | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp -verify-machineinstrs | %ptxas-verify %} ; Disable CGP which also folds branches, so that only BranchFolding is under ; the spotlight. diff --git a/llvm/test/CodeGen/NVPTX/bug21465.ll b/llvm/test/CodeGen/NVPTX/bug21465.ll --- a/llvm/test/CodeGen/NVPTX/bug21465.ll +++ b/llvm/test/CodeGen/NVPTX/bug21465.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -nvptx-lower-args -S | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "nvptx64-unknown-unknown" diff --git a/llvm/test/CodeGen/NVPTX/bug26185-2.ll b/llvm/test/CodeGen/NVPTX/bug26185-2.ll --- a/llvm/test/CodeGen/NVPTX/bug26185-2.ll +++ b/llvm/test/CodeGen/NVPTX/bug26185-2.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_35 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} ; Verify that we correctly emit code for extending ldg/ldu. We do not expose ; extending variants in the backend, but the ldg/ldu selection code may pick diff --git a/llvm/test/CodeGen/NVPTX/bug26185.ll b/llvm/test/CodeGen/NVPTX/bug26185.ll --- a/llvm/test/CodeGen/NVPTX/bug26185.ll +++ b/llvm/test/CodeGen/NVPTX/bug26185.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_35 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} ; Verify that we correctly emit code for i8 ldg/ldu. We do not expose 8-bit ; registers in the backend, so these loads need special handling. diff --git a/llvm/test/CodeGen/NVPTX/bypass-div.ll b/llvm/test/CodeGen/NVPTX/bypass-div.ll --- a/llvm/test/CodeGen/NVPTX/bypass-div.ll +++ b/llvm/test/CodeGen/NVPTX/bypass-div.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} ; 64-bit divides and rems should be split into a fast and slow path where ; the fast path uses a 32-bit operation. diff --git a/llvm/test/CodeGen/NVPTX/divrem-combine.ll b/llvm/test/CodeGen/NVPTX/divrem-combine.ll --- a/llvm/test/CodeGen/NVPTX/divrem-combine.ll +++ b/llvm/test/CodeGen/NVPTX/divrem-combine.ll @@ -1,7 +1,7 @@ ; RUN: llc -O2 < %s -march=nvptx -mcpu=sm_35 | FileCheck %s --check-prefix=O2 --check-prefix=CHECK ; RUN: llc -O0 < %s -march=nvptx -mcpu=sm_35 | FileCheck %s --check-prefix=O0 --check-prefix=CHECK -; RUN: %if ptxas %{ llc -O2 < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} -; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc -O2 < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} +; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} ; The following IR ; diff --git a/llvm/test/CodeGen/NVPTX/extloadv.ll b/llvm/test/CodeGen/NVPTX/extloadv.ll --- a/llvm/test/CodeGen/NVPTX/extloadv.ll +++ b/llvm/test/CodeGen/NVPTX/extloadv.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} define void @foo(ptr nocapture readonly %x_value, ptr nocapture %output) #0 { %1 = load <4 x float>, ptr %x_value, align 16 diff --git a/llvm/test/CodeGen/NVPTX/fns.ll b/llvm/test/CodeGen/NVPTX/fns.ll --- a/llvm/test/CodeGen/NVPTX/fns.ll +++ b/llvm/test/CodeGen/NVPTX/fns.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %} declare i32 @llvm.nvvm.fns(i32, i32, i32) diff --git a/llvm/test/CodeGen/NVPTX/fp-contract.ll b/llvm/test/CodeGen/NVPTX/fp-contract.ll --- a/llvm/test/CodeGen/NVPTX/fp-contract.ll +++ b/llvm/test/CodeGen/NVPTX/fp-contract.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefix=DEFAULT ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %} target triple = "nvptx64-unknown-cuda" diff --git a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll --- a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll +++ b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} ; Check that invariant loads from the global addrspace are lowered to ; ld.global.nc. diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll --- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll +++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_32 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_32 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_32%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_32 | %ptxas-verify %} declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) diff --git a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll --- a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll +++ b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck -check-prefix=SM20 %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck -check-prefix=SM35 %s ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "nvptx64-unknown-unknown" diff --git a/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll b/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll --- a/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll +++ b/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 | FileCheck %s --check-prefix PTX ; RUN: opt < %s -S -nvptx-lower-aggr-copies | FileCheck %s --check-prefix IR -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 -O0 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 -O0 | %ptxas-verify %} ; Verify that the NVPTXLowerAggrCopies pass works as expected - calls to ; llvm.mem* intrinsics get lowered to loops. diff --git a/llvm/test/CodeGen/NVPTX/lower-alloca.ll b/llvm/test/CodeGen/NVPTX/lower-alloca.ll --- a/llvm/test/CodeGen/NVPTX/lower-alloca.ll +++ b/llvm/test/CodeGen/NVPTX/lower-alloca.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -S -nvptx-lower-alloca -infer-address-spaces | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "nvptx64-unknown-unknown" diff --git a/llvm/test/CodeGen/NVPTX/managed.ll b/llvm/test/CodeGen/NVPTX/managed.ll --- a/llvm/test/CodeGen/NVPTX/managed.ll +++ b/llvm/test/CodeGen/NVPTX/managed.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_30 -mattr=+ptx40 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -mattr=+ptx40 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -mattr=+ptx40 | %ptxas-verify %} ; RUN: not --crash llc < %s -march=nvptx -mcpu=sm_20 2>&1 | FileCheck %s --check-prefix ERROR ; ERROR: LLVM ERROR: .attribute(.managed) requires PTX version >= 4.0 and sm_30 diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -1,6 +1,6 @@ ; Verifies correctness of load/store of parameters and return values. ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %} %s_i1 = type { i1 } %s_i8 = type { i8 } diff --git a/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll b/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll --- a/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll +++ b/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_35 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} ; Check load from constant global variables. These loads should be ; ld.global.nc (aka ldg). diff --git a/llvm/test/CodeGen/NVPTX/reg-copy.ll b/llvm/test/CodeGen/NVPTX/reg-copy.ll --- a/llvm/test/CodeGen/NVPTX/reg-copy.ll +++ b/llvm/test/CodeGen/NVPTX/reg-copy.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-unknown-unknown" diff --git a/llvm/test/CodeGen/NVPTX/rotate.ll b/llvm/test/CodeGen/NVPTX/rotate.ll --- a/llvm/test/CodeGen/NVPTX/rotate.ll +++ b/llvm/test/CodeGen/NVPTX/rotate.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck --check-prefix=SM20 %s ; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck --check-prefix=SM35 %s ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} declare i32 @llvm.nvvm.rotate.b32(i32, i32) diff --git a/llvm/test/CodeGen/NVPTX/shfl-p.ll b/llvm/test/CodeGen/NVPTX/shfl-p.ll --- a/llvm/test/CodeGen/NVPTX/shfl-p.ll +++ b/llvm/test/CodeGen/NVPTX/shfl-p.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %} declare {i32, i1} @llvm.nvvm.shfl.down.i32p(i32, i32, i32) declare {float, i1} @llvm.nvvm.shfl.down.f32p(float, i32, i32) diff --git a/llvm/test/CodeGen/NVPTX/shfl-sync-p.ll b/llvm/test/CodeGen/NVPTX/shfl-sync-p.ll --- a/llvm/test/CodeGen/NVPTX/shfl-sync-p.ll +++ b/llvm/test/CodeGen/NVPTX/shfl-sync-p.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %} declare {i32, i1} @llvm.nvvm.shfl.sync.down.i32p(i32, i32, i32, i32) declare {float, i1} @llvm.nvvm.shfl.sync.down.f32p(i32, float, i32, i32) diff --git a/llvm/test/CodeGen/NVPTX/shfl-sync.ll b/llvm/test/CodeGen/NVPTX/shfl-sync.ll --- a/llvm/test/CodeGen/NVPTX/shfl-sync.ll +++ b/llvm/test/CodeGen/NVPTX/shfl-sync.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %} declare i32 @llvm.nvvm.shfl.sync.down.i32(i32, i32, i32, i32) declare float @llvm.nvvm.shfl.sync.down.f32(float, i32, i32, i32) diff --git a/llvm/test/CodeGen/NVPTX/shfl.ll b/llvm/test/CodeGen/NVPTX/shfl.ll --- a/llvm/test/CodeGen/NVPTX/shfl.ll +++ b/llvm/test/CodeGen/NVPTX/shfl.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %} declare i32 @llvm.nvvm.shfl.down.i32(i32, i32, i32) declare float @llvm.nvvm.shfl.down.f32(float, i32, i32) diff --git a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll --- a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 ; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} target triple = "nvptx-unknown-cuda" diff --git a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll --- a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 ; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} target triple = "nvptx-unknown-cuda" diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll --- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 ; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} target triple = "nvptx-unknown-cuda" diff --git a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll --- a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll +++ b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 ; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} target triple = "nvptx-unknown-cuda" diff --git a/llvm/test/CodeGen/NVPTX/vote.ll b/llvm/test/CodeGen/NVPTX/vote.ll --- a/llvm/test/CodeGen/NVPTX/vote.ll +++ b/llvm/test/CodeGen/NVPTX/vote.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %} declare i1 @llvm.nvvm.vote.all(i1) ; CHECK-LABEL: .func{{.*}}vote_all diff --git a/llvm/test/CodeGen/NVPTX/zeroext-32bit.ll b/llvm/test/CodeGen/NVPTX/zeroext-32bit.ll --- a/llvm/test/CodeGen/NVPTX/zeroext-32bit.ll +++ b/llvm/test/CodeGen/NVPTX/zeroext-32bit.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} ; The zeroext attribute below should be silently ignored because ; we can pass a 32-bit integer across a function call without diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -203,6 +203,9 @@ print('couldn\'t determine ptxas version') return None +# Enable %ptxas and %ptxas-verify tools. +# %ptxas-verify defaults to sm_60 architecture. It can be overriden +# by specifying required one, for instance: %ptxas-verify -arch=sm_80. def enable_ptxas(ptxas_executable): version = ptxas_version(ptxas_executable) if version: @@ -213,6 +216,7 @@ (9, 0), (9, 1), (9, 2), (10, 0), (10, 1), (10, 2), (11, 0), (11, 1), (11, 2), (11, 3), (11, 4), (11, 5), (11, 6), + (11, 7), (11, 8), (12, 0), ] def version_int(ver): @@ -235,7 +239,7 @@ config.available_features.add('ptxas') tools.extend([ToolSubst('%ptxas', ptxas_executable), - ToolSubst('%ptxas-verify', '{} -c -o /dev/null -'.format( + ToolSubst('%ptxas-verify', '{} -arch=sm_60 -c -'.format( ptxas_executable))]) ptxas_executable = \