diff --git a/llvm/test/CodeGen/NVPTX/access-non-generic.ll b/llvm/test/CodeGen/NVPTX/access-non-generic.ll --- a/llvm/test/CodeGen/NVPTX/access-non-generic.ll +++ b/llvm/test/CodeGen/NVPTX/access-non-generic.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix PTX ; RUN: opt -mtriple=nvptx-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR ; RUN: opt -mtriple=nvptx64-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} @array = internal addrspace(3) global [10 x float] zeroinitializer, align 4 diff --git a/llvm/test/CodeGen/NVPTX/add-sub-128bit.ll b/llvm/test/CodeGen/NVPTX/add-sub-128bit.ll --- a/llvm/test/CodeGen/NVPTX/add-sub-128bit.ll +++ b/llvm/test/CodeGen/NVPTX/add-sub-128bit.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=COMMON,NOCARRY -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx43 | FileCheck %s --check-prefixes=COMMON,CARRY -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=COMMON,NOCARRY +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx43 | FileCheck %s --check-prefixes=COMMON,CARRY +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast-gvar.ll b/llvm/test/CodeGen/NVPTX/addrspacecast-gvar.ll --- a/llvm/test/CodeGen/NVPTX/addrspacecast-gvar.ll +++ b/llvm/test/CodeGen/NVPTX/addrspacecast-gvar.ll @@ -1,12 +1,12 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK: .visible .global .align 4 .u32 g = 42; ; CHECK: .visible .global .align 1 .b8 ga[4] = {0, 1, 2, 3}; -; CHECK: .visible .global .align 4 .u32 g2 = generic(g); -; CHECK: .visible .global .align 4 .u32 g3 = g; -; CHECK: .visible .global .align 8 .u32 g4[2] = {0, generic(g)}; -; CHECK: .visible .global .align 8 .u32 g5[2] = {0, generic(g)+8}; +; CHECK: .visible .global .align 8 .u64 g2 = generic(g); +; CHECK: .visible .global .align 8 .u64 g3 = g; +; CHECK: .visible .global .align 8 .u64 g4[2] = {0, generic(g)}; +; CHECK: .visible .global .align 8 .u64 g5[2] = {0, generic(g)+8}; @g = addrspace(1) global i32 42 @ga = addrspace(1) global [4 x i8] c"\00\01\02\03" @@ -15,34 +15,34 @@ @g4 = constant {ptr, ptr} {ptr null, ptr addrspacecast (ptr addrspace(1) @g to ptr)} @g5 = constant {ptr, ptr} {ptr null, ptr addrspacecast (ptr addrspace(1) getelementptr (i32, ptr addrspace(1) @g, i32 2) to ptr)} -; CHECK: .visible .global .align 4 .u32 g6 = generic(ga)+2; +; CHECK: .visible .global .align 8 .u64 g6 = generic(ga)+2; @g6 = addrspace(1) global ptr getelementptr inbounds ( [4 x i8], ptr addrspacecast (ptr addrspace(1) @ga to ptr), i32 0, i32 2 ) -; CHECK: .visible .global .align 4 .u32 g7 = generic(g); +; CHECK: .visible .global .align 8 .u64 g7 = generic(g); @g7 = addrspace(1) global ptr addrspacecast ( ptr addrspace(1) @g to ptr ) -; CHECK: .visible .global .align 4 .u32 g8[2] = {0, g}; +; CHECK: .visible .global .align 8 .u64 g8[2] = {0, g}; @g8 = addrspace(1) global [2 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @g] -; CHECK: .visible .global .align 4 .u32 g9[2] = {0, generic(g)}; +; CHECK: .visible .global .align 8 .u64 g9[2] = {0, generic(g)}; @g9 = addrspace(1) global [2 x ptr] [ ptr null, ptr addrspacecast (ptr addrspace(1) @g to ptr) ] -; CHECK: .visible .global .align 4 .u32 g10[2] = {0, g}; +; CHECK: .visible .global .align 8 .u64 g10[2] = {0, g}; @g10 = addrspace(1) global [2 x ptr addrspace(1)] [ ptr addrspace(1) null, ptr addrspace(1) @g ] -; CHECK: .visible .global .align 4 .u32 g11[2] = {0, generic(g)}; +; CHECK: .visible .global .align 8 .u64 g11[2] = {0, generic(g)}; @g11 = addrspace(1) global [2 x ptr] [ ptr null, ptr addrspacecast (ptr addrspace(1) @g to ptr) diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast.ll b/llvm/test/CodeGen/NVPTX/addrspacecast.ll --- a/llvm/test/CodeGen/NVPTX/addrspacecast.ll +++ b/llvm/test/CodeGen/NVPTX/addrspacecast.ll @@ -1,7 +1,7 @@ ; RUN: llc -O0 < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,CLS32,G32 ; RUN: llc -O0 < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,NOPTRCONV,CLS64,G64 ; RUN: llc -O0 < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr| FileCheck %s -check-prefixes=ALL,PTRCONV,CLS64,G64 -; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc -O0 < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %} diff --git a/llvm/test/CodeGen/NVPTX/aggr-param.ll b/llvm/test/CodeGen/NVPTX/aggr-param.ll --- a/llvm/test/CodeGen/NVPTX/aggr-param.ll +++ b/llvm/test/CodeGen/NVPTX/aggr-param.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; Make sure aggregate param types get emitted properly. diff --git a/llvm/test/CodeGen/NVPTX/annotations.ll b/llvm/test/CodeGen/NVPTX/annotations.ll --- a/llvm/test/CodeGen/NVPTX/annotations.ll +++ b/llvm/test/CodeGen/NVPTX/annotations.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} @texture = internal addrspace(1) global i64 0, align 8 diff --git a/llvm/test/CodeGen/NVPTX/arg-lowering.ll b/llvm/test/CodeGen/NVPTX/arg-lowering.ll --- a/llvm/test/CodeGen/NVPTX/arg-lowering.ll +++ b/llvm/test/CodeGen/NVPTX/arg-lowering.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) foo0( ; CHECK: .param .align 4 .b8 foo0_param_0[8] diff --git a/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll b/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll --- a/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll +++ b/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %} ;; These tests should run for all targets diff --git a/llvm/test/CodeGen/NVPTX/arithmetic-int.ll b/llvm/test/CodeGen/NVPTX/arithmetic-int.ll --- a/llvm/test/CodeGen/NVPTX/arithmetic-int.ll +++ b/llvm/test/CodeGen/NVPTX/arithmetic-int.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ;; These tests should run for all targets diff --git a/llvm/test/CodeGen/NVPTX/async-copy.ll b/llvm/test/CodeGen/NVPTX/async-copy.ll --- a/llvm/test/CodeGen/NVPTX/async-copy.ll +++ b/llvm/test/CodeGen/NVPTX/async-copy.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=CHECK,CHECK_PTX32 %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=CHECK,CHECK_PTX64 %s -; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %} +; RUN: %if ptxas-11.0 && ! ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %} ; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %} declare void @llvm.nvvm.cp.async.wait.group(i32) diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll --- a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll +++ b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_60 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %} ; CHECK-LABEL: .func test( diff --git a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll --- a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll +++ b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_60 | FileCheck %s -check-prefixes=CHECK,CHECK32 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %} ; CHECK-LABEL: .func test_atomics_scope( diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll --- a/llvm/test/CodeGen/NVPTX/atomics.ll +++ b/llvm/test/CodeGen/NVPTX/atomics.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_32 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_32 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_32 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_32 | %ptxas-verify %} ; CHECK-LABEL: atom0 diff --git a/llvm/test/CodeGen/NVPTX/bf16.ll b/llvm/test/CodeGen/NVPTX/bf16.ll --- a/llvm/test/CodeGen/NVPTX/bf16.ll +++ b/llvm/test/CodeGen/NVPTX/bf16.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} ; LDST: .b8 bfloat_array[8] = {1, 2, 3, 4, 5, 6, 7, 8}; @"bfloat_array" = addrspace(1) constant [4 x bfloat] @@ -7,8 +7,8 @@ define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_load_store -; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] -; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%rd[0-9]+}}] +; CHECK: st.global.b16 [{{%rd[0-9]+}}], [[TMP]] %val = load bfloat, ptr addrspace(1) %in store bfloat %val, ptr addrspace(1) %out ret void @@ -16,8 +16,8 @@ define void @test_bitcast_from_bfloat(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_bitcast_from_bfloat -; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] -; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%rd[0-9]+}}] +; CHECK: st.global.b16 [{{%rd[0-9]+}}], [[TMP]] %val = load bfloat, ptr addrspace(1) %in %val_int = bitcast bfloat %val to i16 store i16 %val_int, ptr addrspace(1) %out @@ -26,8 +26,8 @@ define void @test_bitcast_to_bfloat(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; CHECK-LABEL: @test_bitcast_to_bfloat -; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] -; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]] +; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%rd[0-9]+}}] +; CHECK: st.global.u16 [{{%rd[0-9]+}}], [[TMP]] %val = load i16, ptr addrspace(1) %in %val_fp = bitcast i16 %val to bfloat store bfloat %val_fp, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/NVPTX/bfe.ll b/llvm/test/CodeGen/NVPTX/bfe.ll --- a/llvm/test/CodeGen/NVPTX/bfe.ll +++ b/llvm/test/CodeGen/NVPTX/bfe.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK: bfe0 diff --git a/llvm/test/CodeGen/NVPTX/bug17709.ll b/llvm/test/CodeGen/NVPTX/bug17709.ll --- a/llvm/test/CodeGen/NVPTX/bug17709.ll +++ b/llvm/test/CodeGen/NVPTX/bug17709.ll @@ -1,27 +1,27 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} - -; ModuleID = '__kernelgen_main_module' -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -define private ptx_device { double, double } @__utils1_MOD_trace(ptr noalias %m) { -entry: - ;unreachable - %t0 = insertvalue {double, double} undef, double 1.0, 0 - %t1 = insertvalue {double, double} %t0, double 1.0, 1 - ret { double, double } %t1 -} - -%struct.descriptor_dimension.0.52 = type { i64, i64, i64 } -%"struct.array2_complex(kind=8).37.18.70" = type { ptr, i64, i64, [2 x %struct.descriptor_dimension.0.52] } -%"struct.array2_complex(kind=8).43.5.57" = type { ptr, i64, i64, [2 x %struct.descriptor_dimension.0.52] } -@replacementOfAlloca8 = private global %"struct.array2_complex(kind=8).37.18.70" zeroinitializer, align 4096 - -; CHECK: .visible .entry __kernelgen_main -define ptx_kernel void @__kernelgen_main(ptr nocapture %args, ptr) { -entry: - %1 = tail call ptx_device { double, double } @__utils1_MOD_trace(ptr noalias @replacementOfAlloca8) - ret void -} - +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} + +; ModuleID = '__kernelgen_main_module' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +define private ptx_device { double, double } @__utils1_MOD_trace(ptr noalias %m) { +entry: + ;unreachable + %t0 = insertvalue {double, double} undef, double 1.0, 0 + %t1 = insertvalue {double, double} %t0, double 1.0, 1 + ret { double, double } %t1 +} + +%struct.descriptor_dimension.0.52 = type { i64, i64, i64 } +%"struct.array2_complex(kind=8).37.18.70" = type { ptr, i64, i64, [2 x %struct.descriptor_dimension.0.52] } +%"struct.array2_complex(kind=8).43.5.57" = type { ptr, i64, i64, [2 x %struct.descriptor_dimension.0.52] } +@replacementOfAlloca8 = private global %"struct.array2_complex(kind=8).37.18.70" zeroinitializer, align 4096 + +; CHECK: .visible .entry __kernelgen_main +define ptx_kernel void @__kernelgen_main(ptr nocapture %args, ptr) { +entry: + %1 = tail call ptx_device { double, double } @__utils1_MOD_trace(ptr noalias @replacementOfAlloca8) + ret void +} + diff --git a/llvm/test/CodeGen/NVPTX/bug22246.ll b/llvm/test/CodeGen/NVPTX/bug22246.ll --- a/llvm/test/CodeGen/NVPTX/bug22246.ll +++ b/llvm/test/CodeGen/NVPTX/bug22246.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/bug22322.ll b/llvm/test/CodeGen/NVPTX/bug22322.ll --- a/llvm/test/CodeGen/NVPTX/bug22322.ll +++ b/llvm/test/CodeGen/NVPTX/bug22322.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/bug26185-2.ll b/llvm/test/CodeGen/NVPTX/bug26185-2.ll --- a/llvm/test/CodeGen/NVPTX/bug26185-2.ll +++ b/llvm/test/CodeGen/NVPTX/bug26185-2.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_35 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} ; Verify that we correctly emit code for extending ldg/ldu. We do not expose ; extending variants in the backend, but the ldg/ldu selection code may pick diff --git a/llvm/test/CodeGen/NVPTX/bug26185.ll b/llvm/test/CodeGen/NVPTX/bug26185.ll --- a/llvm/test/CodeGen/NVPTX/bug26185.ll +++ b/llvm/test/CodeGen/NVPTX/bug26185.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_35 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} ; Verify that we correctly emit code for i8 ldg/ldu. We do not expose 8-bit ; registers in the backend, so these loads need special handling. diff --git a/llvm/test/CodeGen/NVPTX/bug52623.ll b/llvm/test/CodeGen/NVPTX/bug52623.ll --- a/llvm/test/CodeGen/NVPTX/bug52623.ll +++ b/llvm/test/CodeGen/NVPTX/bug52623.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -verify-machineinstrs -; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -verify-machineinstrs +; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} ; Check that llc will not crash even when first MBB doesn't contain ; any instruction. diff --git a/llvm/test/CodeGen/NVPTX/bypass-div.ll b/llvm/test/CodeGen/NVPTX/bypass-div.ll --- a/llvm/test/CodeGen/NVPTX/bypass-div.ll +++ b/llvm/test/CodeGen/NVPTX/bypass-div.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} ; 64-bit divides and rems should be split into a fast and slow path where ; the fast path uses a 32-bit operation. diff --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll --- a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll +++ b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_50 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_50 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_50 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_50 -verify-machineinstrs | %ptxas-verify %} ; calls with a bitcasted function symbol should be fine, but in combination with ; a byval attribute were causing a segfault during isel. This testcase was diff --git a/llvm/test/CodeGen/NVPTX/callchain.ll b/llvm/test/CodeGen/NVPTX/callchain.ll --- a/llvm/test/CodeGen/NVPTX/callchain.ll +++ b/llvm/test/CodeGen/NVPTX/callchain.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target triple = "nvptx" diff --git a/llvm/test/CodeGen/NVPTX/calling-conv.ll b/llvm/test/CodeGen/NVPTX/calling-conv.ll --- a/llvm/test/CodeGen/NVPTX/calling-conv.ll +++ b/llvm/test/CodeGen/NVPTX/calling-conv.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} diff --git a/llvm/test/CodeGen/NVPTX/calls-with-phi.ll b/llvm/test/CodeGen/NVPTX/calls-with-phi.ll --- a/llvm/test/CodeGen/NVPTX/calls-with-phi.ll +++ b/llvm/test/CodeGen/NVPTX/calls-with-phi.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 2>&1 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} ; Make sure the example doesn't crash with segfault diff --git a/llvm/test/CodeGen/NVPTX/combine-min-max.ll b/llvm/test/CodeGen/NVPTX/combine-min-max.ll --- a/llvm/test/CodeGen/NVPTX/combine-min-max.ll +++ b/llvm/test/CodeGen/NVPTX/combine-min-max.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O2 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -O2 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -O2 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -O2 | %ptxas-verify %} ; ************************************* ; * Cases with no min/max diff --git a/llvm/test/CodeGen/NVPTX/compare-int.ll b/llvm/test/CodeGen/NVPTX/compare-int.ll --- a/llvm/test/CodeGen/NVPTX/compare-int.ll +++ b/llvm/test/CodeGen/NVPTX/compare-int.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ;; These tests should run for all targets diff --git a/llvm/test/CodeGen/NVPTX/constant-vectors.ll b/llvm/test/CodeGen/NVPTX/constant-vectors.ll --- a/llvm/test/CodeGen/NVPTX/constant-vectors.ll +++ b/llvm/test/CodeGen/NVPTX/constant-vectors.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target triple = "nvptx-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/convert-fp.ll b/llvm/test/CodeGen/NVPTX/convert-fp.ll --- a/llvm/test/CodeGen/NVPTX/convert-fp.ll +++ b/llvm/test/CodeGen/NVPTX/convert-fp.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} define i16 @cvt_u16_f32(float %x) { diff --git a/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll b/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll --- a/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll +++ b/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} diff --git a/llvm/test/CodeGen/NVPTX/ctlz.ll b/llvm/test/CodeGen/NVPTX/ctlz.ll --- a/llvm/test/CodeGen/NVPTX/ctlz.ll +++ b/llvm/test/CodeGen/NVPTX/ctlz.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/llvm/test/CodeGen/NVPTX/ctpop.ll b/llvm/test/CodeGen/NVPTX/ctpop.ll --- a/llvm/test/CodeGen/NVPTX/ctpop.ll +++ b/llvm/test/CodeGen/NVPTX/ctpop.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/llvm/test/CodeGen/NVPTX/cttz.ll b/llvm/test/CodeGen/NVPTX/cttz.ll --- a/llvm/test/CodeGen/NVPTX/cttz.ll +++ b/llvm/test/CodeGen/NVPTX/cttz.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/llvm/test/CodeGen/NVPTX/disable-opt.ll b/llvm/test/CodeGen/NVPTX/disable-opt.ll --- a/llvm/test/CodeGen/NVPTX/disable-opt.ll +++ b/llvm/test/CodeGen/NVPTX/disable-opt.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O0 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -O0 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -O0 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -O0 | %ptxas-verify %} define void @foo(ptr %output) { ; CHECK-LABEL: .visible .func foo( diff --git a/llvm/test/CodeGen/NVPTX/div-ri.ll b/llvm/test/CodeGen/NVPTX/div-ri.ll --- a/llvm/test/CodeGen/NVPTX/div-ri.ll +++ b/llvm/test/CodeGen/NVPTX/div-ri.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=0 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=0 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-prec-divf32=0 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-prec-divf32=0 | %ptxas-verify %} define float @foo(float %a) { ; CHECK: div.approx.f32 diff --git a/llvm/test/CodeGen/NVPTX/divrem-combine.ll b/llvm/test/CodeGen/NVPTX/divrem-combine.ll --- a/llvm/test/CodeGen/NVPTX/divrem-combine.ll +++ b/llvm/test/CodeGen/NVPTX/divrem-combine.ll @@ -1,7 +1,7 @@ -; RUN: llc -O2 < %s -march=nvptx -mcpu=sm_35 | FileCheck %s --check-prefix=O2 --check-prefix=CHECK -; RUN: llc -O0 < %s -march=nvptx -mcpu=sm_35 | FileCheck %s --check-prefix=O0 --check-prefix=CHECK -; RUN: %if ptxas %{ llc -O2 < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} -; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} +; RUN: llc -O2 < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix=O2 --check-prefix=CHECK +; RUN: llc -O0 < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix=O0 --check-prefix=CHECK +; RUN: %if ptxas %{ llc -O2 < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} +; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} ; The following IR ; diff --git a/llvm/test/CodeGen/NVPTX/envreg.ll b/llvm/test/CodeGen/NVPTX/envreg.ll --- a/llvm/test/CodeGen/NVPTX/envreg.ll +++ b/llvm/test/CodeGen/NVPTX/envreg.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} declare i32 @llvm.nvvm.read.ptx.sreg.envreg0() diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll --- a/llvm/test/CodeGen/NVPTX/fast-math.ll +++ b/llvm/test/CodeGen/NVPTX/fast-math.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} declare float @llvm.sqrt.f32(float) declare double @llvm.sqrt.f64(double) diff --git a/llvm/test/CodeGen/NVPTX/fma-assoc.ll b/llvm/test/CodeGen/NVPTX/fma-assoc.ll --- a/llvm/test/CodeGen/NVPTX/fma-assoc.ll +++ b/llvm/test/CodeGen/NVPTX/fma-assoc.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s -check-prefix=CHECK -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNSAFE -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNSAFE +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | %ptxas-verify %} define ptx_device float @t1_f32(float %x, float %y, float %z, float %u, float %v) { diff --git a/llvm/test/CodeGen/NVPTX/fma-disable.ll b/llvm/test/CodeGen/NVPTX/fma-disable.ll --- a/llvm/test/CodeGen/NVPTX/fma-disable.ll +++ b/llvm/test/CodeGen/NVPTX/fma-disable.ll @@ -2,8 +2,8 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=1 | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=1 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=1 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=0 | %ptxas-verify %} diff --git a/llvm/test/CodeGen/NVPTX/fma.ll b/llvm/test/CodeGen/NVPTX/fma.ll --- a/llvm/test/CodeGen/NVPTX/fma.ll +++ b/llvm/test/CodeGen/NVPTX/fma.ll @@ -1,43 +1,43 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -verify-machineinstrs | %ptxas-verify %} - -declare float @dummy_f32(float, float) #0 -declare double @dummy_f64(double, double) #0 - -define ptx_device float @t1_f32(float %x, float %y, float %z) { -; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: ret; - %a = fmul float %x, %y - %b = fadd float %a, %z - ret float %b -} - -define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) { -; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: ret; - %a = fmul float %x, %y - %b = fadd float %a, %z - %c = fadd float %a, %w - %d = call float @dummy_f32(float %b, float %c) - ret float %d -} - -define ptx_device double @t1_f64(double %x, double %y, double %z) { -; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: ret; - %a = fmul double %x, %y - %b = fadd double %a, %z - ret double %b -} - -define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) { -; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: ret; - %a = fmul double %x, %y - %b = fadd double %a, %z - %c = fadd double %a, %w - %d = call double @dummy_f64(double %b, double %c) - ret double %d -} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast -verify-machineinstrs | %ptxas-verify %} + +declare float @dummy_f32(float, float) #0 +declare double @dummy_f64(double, double) #0 + +define ptx_device float @t1_f32(float %x, float %y, float %z) { +; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; + %a = fmul float %x, %y + %b = fadd float %a, %z + ret float %b +} + +define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) { +; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; + %a = fmul float %x, %y + %b = fadd float %a, %z + %c = fadd float %a, %w + %d = call float @dummy_f32(float %b, float %c) + ret float %d +} + +define ptx_device double @t1_f64(double %x, double %y, double %z) { +; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; + %a = fmul double %x, %y + %b = fadd double %a, %z + ret double %b +} + +define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) { +; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; + %a = fmul double %x, %y + %b = fadd double %a, %z + %c = fadd double %a, %w + %d = call double @dummy_f64(double %b, double %c) + ret double %d +} diff --git a/llvm/test/CodeGen/NVPTX/fminimum-fmaximum.ll b/llvm/test/CodeGen/NVPTX/fminimum-fmaximum.ll --- a/llvm/test/CodeGen/NVPTX/fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/NVPTX/fminimum-fmaximum.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=nvptx | FileCheck %s --check-prefixes=CHECK -; RUN: llc < %s -march=nvptx -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK -; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %} -; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %} +; RUN: llc < %s -march=nvptx64 | FileCheck %s --check-prefixes=CHECK +; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK +; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} +; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %} ; ---- minimum ---- diff --git a/llvm/test/CodeGen/NVPTX/fp-literals.ll b/llvm/test/CodeGen/NVPTX/fp-literals.ll --- a/llvm/test/CodeGen/NVPTX/fp-literals.ll +++ b/llvm/test/CodeGen/NVPTX/fp-literals.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %} target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/llvm/test/CodeGen/NVPTX/fp16.ll b/llvm/test/CodeGen/NVPTX/fp16.ll --- a/llvm/test/CodeGen/NVPTX/fp16.ll +++ b/llvm/test/CodeGen/NVPTX/fp16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=nvptx -verify-machineinstrs < %s | FileCheck %s -; RUN: %if ptxas %{ llc -march=nvptx -verify-machineinstrs < %s | %ptxas-verify %} +; RUN: llc -march=nvptx64 -verify-machineinstrs < %s | FileCheck %s +; RUN: %if ptxas %{ llc -march=nvptx64 -verify-machineinstrs < %s | %ptxas-verify %} declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone diff --git a/llvm/test/CodeGen/NVPTX/function-align.ll b/llvm/test/CodeGen/NVPTX/function-align.ll --- a/llvm/test/CodeGen/NVPTX/function-align.ll +++ b/llvm/test/CodeGen/NVPTX/function-align.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK-NOT: .align 2 define ptx_device void @foo() align 2 { diff --git a/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll b/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll --- a/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll +++ b/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "nvptx-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/global-addrspace.ll b/llvm/test/CodeGen/NVPTX/global-addrspace.ll --- a/llvm/test/CodeGen/NVPTX/global-addrspace.ll +++ b/llvm/test/CodeGen/NVPTX/global-addrspace.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; PTX32: .visible .global .align 4 .u32 i; diff --git a/llvm/test/CodeGen/NVPTX/global-ordering.ll b/llvm/test/CodeGen/NVPTX/global-ordering.ll --- a/llvm/test/CodeGen/NVPTX/global-ordering.ll +++ b/llvm/test/CodeGen/NVPTX/global-ordering.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; Make sure we emit these globals in def-use order diff --git a/llvm/test/CodeGen/NVPTX/global-visibility.ll b/llvm/test/CodeGen/NVPTX/global-visibility.ll --- a/llvm/test/CodeGen/NVPTX/global-visibility.ll +++ b/llvm/test/CodeGen/NVPTX/global-visibility.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; PTX does not support .hidden or .protected. ; Make sure we do not emit them. diff --git a/llvm/test/CodeGen/NVPTX/globals_init.ll b/llvm/test/CodeGen/NVPTX/globals_init.ll --- a/llvm/test/CodeGen/NVPTX/globals_init.ll +++ b/llvm/test/CodeGen/NVPTX/globals_init.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; Make sure the globals constant initializers are not prone to host endianess ; issues. diff --git a/llvm/test/CodeGen/NVPTX/globals_lowering.ll b/llvm/test/CodeGen/NVPTX/globals_lowering.ll --- a/llvm/test/CodeGen/NVPTX/globals_lowering.ll +++ b/llvm/test/CodeGen/NVPTX/globals_lowering.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -relocation-model=static | FileCheck %s --check-prefix CHK -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -relocation-model=static | %ptxas-verify %} +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -relocation-model=static | FileCheck %s --check-prefix CHK +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -relocation-model=static | %ptxas-verify %} %MyStruct = type { i32, i32, float } @Gbl = internal addrspace(3) global [1024 x %MyStruct] zeroinitializer diff --git a/llvm/test/CodeGen/NVPTX/half.ll b/llvm/test/CodeGen/NVPTX/half.ll --- a/llvm/test/CodeGen/NVPTX/half.ll +++ b/llvm/test/CodeGen/NVPTX/half.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} ; CHECK: .b8 half_array[8] = {1, 2, 3, 4, 5, 6, 7, 8}; @"half_array" = addrspace(1) constant [4 x half] @@ -7,8 +7,8 @@ define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_load_store -; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] -; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%rd[0-9]+}}] +; CHECK: st.global.b16 [{{%rd[0-9]+}}], [[TMP]] %val = load half, ptr addrspace(1) %in store half %val, ptr addrspace(1) %out ret void @@ -16,8 +16,8 @@ define void @test_bitcast_from_half(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_bitcast_from_half -; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] -; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%rd[0-9]+}}] +; CHECK: st.global.b16 [{{%rd[0-9]+}}], [[TMP]] %val = load half, ptr addrspace(1) %in %val_int = bitcast half %val to i16 store i16 %val_int, ptr addrspace(1) %out @@ -26,8 +26,8 @@ define void @test_bitcast_to_half(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; CHECK-LABEL: @test_bitcast_to_half -; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] -; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]] +; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%rd[0-9]+}}] +; CHECK: st.global.u16 [{{%rd[0-9]+}}], [[TMP]] %val = load i16, ptr addrspace(1) %in %val_fp = bitcast i16 %val to half store half %val_fp, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/NVPTX/i1-global.ll b/llvm/test/CodeGen/NVPTX/i1-global.ll --- a/llvm/test/CodeGen/NVPTX/i1-global.ll +++ b/llvm/test/CodeGen/NVPTX/i1-global.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "nvptx-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll b/llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll --- a/llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll +++ b/llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK-LABEL: foo ; CHECK: setp diff --git a/llvm/test/CodeGen/NVPTX/i1-param.ll b/llvm/test/CodeGen/NVPTX/i1-param.ll --- a/llvm/test/CodeGen/NVPTX/i1-param.ll +++ b/llvm/test/CodeGen/NVPTX/i1-param.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "nvptx-nvidia-cuda" @@ -8,7 +8,7 @@ ; CHECK: .entry foo ; CHECK: .param .u8 foo_param_0 -; CHECK: .param .u32 foo_param_1 +; CHECK: .param .u64 foo_param_1 define void @foo(i1 %p, ptr %out) { %val = zext i1 %p to i32 store i32 %val, ptr %out diff --git a/llvm/test/CodeGen/NVPTX/i128-param.ll b/llvm/test/CodeGen/NVPTX/i128-param.ll --- a/llvm/test/CodeGen/NVPTX/i128-param.ll +++ b/llvm/test/CodeGen/NVPTX/i128-param.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -O0 -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK-LABEL: .visible .func callee( ; CHECK-NEXT: .param .align 16 .b8 callee_param_0[16], diff --git a/llvm/test/CodeGen/NVPTX/i8-param.ll b/llvm/test/CodeGen/NVPTX/i8-param.ll --- a/llvm/test/CodeGen/NVPTX/i8-param.ll +++ b/llvm/test/CodeGen/NVPTX/i8-param.ll @@ -1,24 +1,24 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" - -; CHECK: .visible .func (.param .b32 func_retval0) callee -define i8 @callee(i8 %a) { -; CHECK: ld.param.u8 - %ret = add i8 %a, 42 -; CHECK: st.param.b32 - ret i8 %ret -} - -; CHECK: .visible .func caller -define void @caller(ptr %a) { -; CHECK: ld.u8 - %val = load i8, ptr %a - %ret = tail call i8 @callee(i8 %val) -; CHECK: ld.param.b32 - store i8 %ret, ptr %a - ret void -} - - +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +; CHECK: .visible .func (.param .b32 func_retval0) callee +define i8 @callee(i8 %a) { +; CHECK: ld.param.u8 + %ret = add i8 %a, 42 +; CHECK: st.param.b32 + ret i8 %ret +} + +; CHECK: .visible .func caller +define void @caller(ptr %a) { +; CHECK: ld.u8 + %val = load i8, ptr %a + %ret = tail call i8 @callee(i8 %val) +; CHECK: ld.param.b32 + store i8 %ret, ptr %a + ret void +} + + diff --git a/llvm/test/CodeGen/NVPTX/idioms.ll b/llvm/test/CodeGen/NVPTX/idioms.ll --- a/llvm/test/CodeGen/NVPTX/idioms.ll +++ b/llvm/test/CodeGen/NVPTX/idioms.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} %struct.S16 = type { i16, i16 } diff --git a/llvm/test/CodeGen/NVPTX/imad.ll b/llvm/test/CodeGen/NVPTX/imad.ll --- a/llvm/test/CodeGen/NVPTX/imad.ll +++ b/llvm/test/CodeGen/NVPTX/imad.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK: imad define i32 @imad(i32 %a, i32 %b, i32 %c) { diff --git a/llvm/test/CodeGen/NVPTX/inline-asm.ll b/llvm/test/CodeGen/NVPTX/inline-asm.ll --- a/llvm/test/CodeGen/NVPTX/inline-asm.ll +++ b/llvm/test/CodeGen/NVPTX/inline-asm.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} define float @test(float %x) { entry: diff --git a/llvm/test/CodeGen/NVPTX/inlineasm-output-template.ll b/llvm/test/CodeGen/NVPTX/inlineasm-output-template.ll --- a/llvm/test/CodeGen/NVPTX/inlineasm-output-template.ll +++ b/llvm/test/CodeGen/NVPTX/inlineasm-output-template.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=nvptx < %s | FileCheck %s -; RUN: %if ptxas %{ llc -march=nvptx < %s | %ptxas-verify %} +; RUN: llc -march=nvptx64 < %s | FileCheck %s +; RUN: %if ptxas %{ llc -march=nvptx64 < %s | %ptxas-verify %} ; Test that %c works with immediates ; CHECK-LABEL: test_inlineasm_c_output_template0 diff --git a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll --- a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll +++ b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll @@ -10,7 +10,7 @@ ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \ ; RUN: -passes=nvvm-intr-range -nvvm-intr-range-sm=30 \ ; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} define ptx_device i32 @test_tid_x() { diff --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll --- a/llvm/test/CodeGen/NVPTX/intrinsics.ll +++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK-LABEL: test_fabsf( diff --git a/llvm/test/CodeGen/NVPTX/isspacep.ll b/llvm/test/CodeGen/NVPTX/isspacep.ll --- a/llvm/test/CodeGen/NVPTX/isspacep.ll +++ b/llvm/test/CodeGen/NVPTX/isspacep.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} declare i1 @llvm.nvvm.isspacep.const(ptr) readnone noinline declare i1 @llvm.nvvm.isspacep.global(ptr) readnone noinline diff --git a/llvm/test/CodeGen/NVPTX/ld-addrspace.ll b/llvm/test/CodeGen/NVPTX/ld-addrspace.ll --- a/llvm/test/CodeGen/NVPTX/ld-addrspace.ll +++ b/llvm/test/CodeGen/NVPTX/ld-addrspace.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G32,LS32 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G64,LS64 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | FileCheck %s --check-prefixes=G64,LS32 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %} diff --git a/llvm/test/CodeGen/NVPTX/ld-generic.ll b/llvm/test/CodeGen/NVPTX/ld-generic.ll --- a/llvm/test/CodeGen/NVPTX/ld-generic.ll +++ b/llvm/test/CodeGen/NVPTX/ld-generic.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} diff --git a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py --- a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py +++ b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py @@ -2,10 +2,10 @@ # LLVM generates correct PTX for them. # RUN: %python %s > %t.ll -# RUN: llc < %t.ll -march=nvptx64 -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P64 %t.ll # RUN: llc < %t.ll -march=nvptx -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P32 %t.ll +# RUN: llc < %t.ll -march=nvptx64 -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P64 %t.ll +# RUN: %if ptxas && !ptxas-12.0 %{ llc < %t.ll -march=nvptx -mcpu=sm_30 | %ptxas-verify %} # RUN: %if ptxas %{ llc < %t.ll -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %} -# RUN: %if ptxas %{ llc < %t.ll -march=nvptx -mcpu=sm_30 | %ptxas-verify %} from __future__ import print_function diff --git a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll --- a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll +++ b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} declare <4 x float> @bar() ; CHECK-LABEL: .func foo( define void @foo(ptr %ptr) { -; CHECK: ld.param.u32 %[[PTR:r[0-9]+]], [foo_param_0]; +; CHECK: ld.param.u64 %[[PTR:rd[0-9]+]], [foo_param_0]; ; CHECK: ld.param.v4.f32 {[[E0:%f[0-9]+]], [[E1:%f[0-9]+]], [[E2:%f[0-9]+]], [[E3:%f[0-9]+]]}, [retval0+0]; ; CHECK: st.v4.f32 [%[[PTR]]], {[[E0]], [[E1]], [[E2]], [[E3]]} %val = tail call <4 x float> @bar() diff --git a/llvm/test/CodeGen/NVPTX/ldu-i8.ll b/llvm/test/CodeGen/NVPTX/ldu-i8.ll --- a/llvm/test/CodeGen/NVPTX/ldu-i8.ll +++ b/llvm/test/CodeGen/NVPTX/ldu-i8.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll --- a/llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll +++ b/llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll @@ -1,12 +1,12 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @reg_plus_offset(ptr %a) { -; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32]; -; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36]; +; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}+32]; +; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}+36]; %p2 = getelementptr i32, ptr %a, i32 8 %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr %p2, i32 4) %p3 = getelementptr i32, ptr %a, i32 9 diff --git a/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll b/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll --- a/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll +++ b/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 2>&1 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} ; Allow to make libcalls that are defined in the current module diff --git a/llvm/test/CodeGen/NVPTX/load-sext-i1.ll b/llvm/test/CodeGen/NVPTX/load-sext-i1.ll --- a/llvm/test/CodeGen/NVPTX/load-sext-i1.ll +++ b/llvm/test/CodeGen/NVPTX/load-sext-i1.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "nvptx-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll --- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll +++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=PTX32 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=PTX64 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} ; Ensure we access the local stack properly diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll --- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -mtriple nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK64 ; RUN: llc < %s -mtriple nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK32 +; RUN: llc < %s -mtriple nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK64 +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -mtriple nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -mtriple nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} %struct.ham = type { [4 x i32] } diff --git a/llvm/test/CodeGen/NVPTX/machine-sink.ll b/llvm/test/CodeGen/NVPTX/machine-sink.ll --- a/llvm/test/CodeGen/NVPTX/machine-sink.ll +++ b/llvm/test/CodeGen/NVPTX/machine-sink.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/llvm/test/CodeGen/NVPTX/managed.ll b/llvm/test/CodeGen/NVPTX/managed.ll --- a/llvm/test/CodeGen/NVPTX/managed.ll +++ b/llvm/test/CodeGen/NVPTX/managed.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_30 -mattr=+ptx40 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -mattr=+ptx40 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx40 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx40 | %ptxas-verify %} -; RUN: not --crash llc < %s -march=nvptx -mcpu=sm_20 2>&1 | FileCheck %s --check-prefix ERROR +; RUN: not --crash llc < %s -march=nvptx64 -mcpu=sm_20 2>&1 | FileCheck %s --check-prefix ERROR ; ERROR: LLVM ERROR: .attribute(.managed) requires PTX version >= 4.0 and sm_30 ; CHECK: .visible .global .align 4 .u32 device_g; @@ -11,7 +11,7 @@ ; CHECK: .extern .global .align 4 .u32 decl_g; @decl_g = external addrspace(1) global i32, align 4 -; CHECK: .extern .global .attribute(.managed) .align 8 .b32 managed_decl_g; +; CHECK: .extern .global .attribute(.managed) .align 8 .b64 managed_decl_g; @managed_decl_g = external addrspace(1) global ptr, align 8 !nvvm.annotations = !{!0, !1} diff --git a/llvm/test/CodeGen/NVPTX/mbarrier.ll b/llvm/test/CodeGen/NVPTX/mbarrier.ll --- a/llvm/test/CodeGen/NVPTX/mbarrier.ll +++ b/llvm/test/CodeGen/NVPTX/mbarrier.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_80 | FileCheck %s -check-prefix=CHECK_PTX32 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s -check-prefix=CHECK_PTX64 -; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %} +; RUN: %if ptxas-11.0 && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %} ; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %} declare void @llvm.nvvm.mbarrier.init(ptr %a, i32 %b) diff --git a/llvm/test/CodeGen/NVPTX/minmax-negative.ll b/llvm/test/CodeGen/NVPTX/minmax-negative.ll --- a/llvm/test/CodeGen/NVPTX/minmax-negative.ll +++ b/llvm/test/CodeGen/NVPTX/minmax-negative.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -O0 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -O0 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -O0 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -O0 | %ptxas-verify %} define i16 @test1(ptr %sur1) { ; CHECK-NOT: mov.u16 %rs{{[0-9]+}}, 32767 diff --git a/llvm/test/CodeGen/NVPTX/module-inline-asm.ll b/llvm/test/CodeGen/NVPTX/module-inline-asm.ll --- a/llvm/test/CodeGen/NVPTX/module-inline-asm.ll +++ b/llvm/test/CodeGen/NVPTX/module-inline-asm.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/llvm/test/CodeGen/NVPTX/mulwide.ll b/llvm/test/CodeGen/NVPTX/mulwide.ll --- a/llvm/test/CodeGen/NVPTX/mulwide.ll +++ b/llvm/test/CodeGen/NVPTX/mulwide.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O3 | FileCheck %s --check-prefix=OPT -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O0 | FileCheck %s --check-prefix=NOOPT -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -O3 | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -O0 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -O3 | FileCheck %s --check-prefix=OPT +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -O0 | FileCheck %s --check-prefix=NOOPT +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -O3 | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -O0 | %ptxas-verify %} ; OPT-LABEL: @mulwide16 ; NOOPT-LABEL: @mulwide16 diff --git a/llvm/test/CodeGen/NVPTX/named-barriers.ll b/llvm/test/CodeGen/NVPTX/named-barriers.ll --- a/llvm/test/CodeGen/NVPTX/named-barriers.ll +++ b/llvm/test/CodeGen/NVPTX/named-barriers.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; Use bar.sync to arrive at a pre-computed barrier number and diff --git a/llvm/test/CodeGen/NVPTX/nofunc.ll b/llvm/test/CodeGen/NVPTX/nofunc.ll --- a/llvm/test/CodeGen/NVPTX/nofunc.ll +++ b/llvm/test/CodeGen/NVPTX/nofunc.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; Test that we don't crash if we're compiling a module with function references, diff --git a/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll b/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll --- a/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll +++ b/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll @@ -1,13 +1,13 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target triple = "nvptx-unknown-nvcl" define void @foo(i64 %img, i64 %sampler, ptr align 32 %v1, ptr %v2) { ; The parameter alignment is determined by the align attribute (default 1). ; CHECK-LABEL: .entry foo( -; CHECK: .param .u32 .ptr .align 32 foo_param_2 -; CHECK: .param .u32 .ptr .align 1 foo_param_3 +; CHECK: .param .u64 .ptr .align 32 foo_param_2 +; CHECK: .param .u64 .ptr .align 1 foo_param_3 ret void } diff --git a/llvm/test/CodeGen/NVPTX/nvvm-annotations-D120129.ll b/llvm/test/CodeGen/NVPTX/nvvm-annotations-D120129.ll --- a/llvm/test/CodeGen/NVPTX/nvvm-annotations-D120129.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-annotations-D120129.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=nvptx-unknown-unknown | FileCheck %s -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx-unknown-unknown | %ptxas-verify %} +; RUN: llc < %s -mtriple=nvptx64-unknown-unknown | FileCheck %s +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-unknown-unknown | %ptxas-verify %} ; ; NVPTXTargetLowering::getFunctionParamOptimizedAlign, which was introduces in ; D120129, contained a poorly designed assertion checking that a function with diff --git a/llvm/test/CodeGen/NVPTX/packed-aggr.ll b/llvm/test/CodeGen/NVPTX/packed-aggr.ll --- a/llvm/test/CodeGen/NVPTX/packed-aggr.ll +++ b/llvm/test/CodeGen/NVPTX/packed-aggr.ll @@ -5,7 +5,7 @@ ; RUN: FileCheck %s --check-prefixes=CHECK,CHECK32 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx71 | \ ; RUN: FileCheck %s --check-prefixes=CHECK,CHECK64 -; RUN: %if ptxas-11.1 %{ llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %} +; RUN: %if ptxas-11.1 && !ptxas-12.0%{ llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %} ; RUN: %if ptxas-11.1 %{ llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %} ;; Test that packed structs with symbol references are represented using the diff --git a/llvm/test/CodeGen/NVPTX/param-align.ll b/llvm/test/CodeGen/NVPTX/param-align.ll --- a/llvm/test/CodeGen/NVPTX/param-align.ll +++ b/llvm/test/CodeGen/NVPTX/param-align.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=CHECK,NOALIGN4 -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-force-min-byval-param-align | FileCheck %s --check-prefixes=CHECK,ALIGN4 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -nvptx-force-min-byval-param-align | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=CHECK,NOALIGN4 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-force-min-byval-param-align | FileCheck %s --check-prefixes=CHECK,ALIGN4 +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-force-min-byval-param-align | %ptxas-verify %} ;;; Need 4-byte alignment on ptr passed byval define ptx_device void @t1(ptr byval(float) %x) { diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll --- a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll +++ b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=nvptx-unknown-unknown | FileCheck %s -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx-unknown-unknown | %ptxas-verify %} +; RUN: llc < %s -mtriple=nvptx64-unknown-unknown | FileCheck %s +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-unknown-unknown | %ptxas-verify %} ; ; Check that parameters of a __device__ function with private or internal ; linkage called from a __global__ (kernel) function get increased alignment, @@ -81,7 +81,7 @@ define dso_local void @caller_St4x1(ptr nocapture noundef readonly byval(%struct.St4x1) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func caller_St4x1( ; CHECK: .param .align 4 .b8 caller_St4x1_param_0[4], - ; CHECK: .param .b32 caller_St4x1_param_1 + ; CHECK: .param .b64 caller_St4x1_param_1 ; CHECK: ) ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; @@ -113,7 +113,7 @@ define dso_local void @caller_St4x2(ptr nocapture noundef readonly byval(%struct.St4x2) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func caller_St4x2( ; CHECK: .param .align 4 .b8 caller_St4x2_param_0[8], - ; CHECK: .param .b32 caller_St4x2_param_1 + ; CHECK: .param .b64 caller_St4x2_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[8]; ; CHECK: st.param.v2.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; @@ -154,7 +154,7 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct.St4x3) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func caller_St4x3( ; CHECK: .param .align 4 .b8 caller_St4x3_param_0[12], - ; CHECK: .param .b32 caller_St4x3_param_1 + ; CHECK: .param .b64 caller_St4x3_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[12]; ; CHECK: st.param.v2.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; @@ -202,7 +202,7 @@ define dso_local void @caller_St4x4(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func caller_St4x4( ; CHECK: .param .align 4 .b8 caller_St4x4_param_0[16], - ; CHECK: .param .b32 caller_St4x4_param_1 + ; CHECK: .param .b64 caller_St4x4_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; @@ -252,7 +252,7 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct.St4x5) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func caller_St4x5( ; CHECK: .param .align 4 .b8 caller_St4x5_param_0[20], - ; CHECK: .param .b32 caller_St4x5_param_1 + ; CHECK: .param .b64 caller_St4x5_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[20]; ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; @@ -312,7 +312,7 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct.St4x6) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func caller_St4x6( ; CHECK: .param .align 4 .b8 caller_St4x6_param_0[24], - ; CHECK: .param .b32 caller_St4x6_param_1 + ; CHECK: .param .b64 caller_St4x6_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[24]; ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; @@ -378,7 +378,7 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct.St4x7) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func caller_St4x7( ; CHECK: .param .align 4 .b8 caller_St4x7_param_0[28], - ; CHECK: .param .b32 caller_St4x7_param_1 + ; CHECK: .param .b64 caller_St4x7_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[28]; ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; @@ -454,7 +454,7 @@ define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct.St4x8) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func caller_St4x8( ; CHECK: .param .align 4 .b8 caller_St4x8_param_0[32], - ; CHECK: .param .b32 caller_St4x8_param_1 + ; CHECK: .param .b64 caller_St4x8_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[32]; ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; @@ -532,7 +532,7 @@ define dso_local void @caller_St8x1(ptr nocapture noundef readonly byval(%struct.St8x1) align 8 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func caller_St8x1( ; CHECK: .param .align 8 .b8 caller_St8x1_param_0[8], - ; CHECK: .param .b32 caller_St8x1_param_1 + ; CHECK: .param .b64 caller_St8x1_param_1 ; CHECK: ) ; CHECK: .param .b64 param0; ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; @@ -564,7 +564,7 @@ define dso_local void @caller_St8x2(ptr nocapture noundef readonly byval(%struct.St8x2) align 8 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func caller_St8x2( ; CHECK: .param .align 8 .b8 caller_St8x2_param_0[16], - ; CHECK: .param .b32 caller_St8x2_param_1 + ; CHECK: .param .b64 caller_St8x2_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK: st.param.v2.b64 [param0+0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; @@ -602,7 +602,7 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct.St8x3) align 8 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func caller_St8x3( ; CHECK: .param .align 8 .b8 caller_St8x3_param_0[24], - ; CHECK: .param .b32 caller_St8x3_param_1 + ; CHECK: .param .b64 caller_St8x3_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[24]; ; CHECK: st.param.v2.b64 [param0+0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; @@ -650,7 +650,7 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct.St8x4) align 8 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func caller_St8x4( ; CHECK: .param .align 8 .b8 caller_St8x4_param_0[32], - ; CHECK: .param .b32 caller_St8x4_param_1 + ; CHECK: .param .b64 caller_St8x4_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[32]; ; CHECK: st.param.v2.b64 [param0+0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-kernel.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-kernel.ll --- a/llvm/test/CodeGen/NVPTX/param-vectorize-kernel.ll +++ b/llvm/test/CodeGen/NVPTX/param-vectorize-kernel.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=nvptx-unknown-unknown | FileCheck %s -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx-unknown-unknown | %ptxas-verify %} +; RUN: llc < %s -mtriple=nvptx64-unknown-unknown | FileCheck %s +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-unknown-unknown | %ptxas-verify %} ; ; Check that parameters of a __global__ (kernel) function do not get increased ; alignment, and no additional vectorization is performed on loads/stores with @@ -63,9 +63,9 @@ define dso_local void @foo_St4x1(ptr nocapture noundef readonly byval(%struct.St4x1) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x1( ; CHECK: .param .align 4 .b8 foo_St4x1_param_0[4], - ; CHECK: .param .b32 foo_St4x1_param_1 + ; CHECK: .param .b64 foo_St4x1_param_1 ; CHECK: ) - ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x1_param_1]; + ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x1_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x1_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ret; @@ -77,9 +77,9 @@ define dso_local void @foo_St4x2(ptr nocapture noundef readonly byval(%struct.St4x2) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x2( ; CHECK: .param .align 4 .b8 foo_St4x2_param_0[8], - ; CHECK: .param .b32 foo_St4x2_param_1 + ; CHECK: .param .b64 foo_St4x2_param_1 ; CHECK: ) - ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x2_param_1]; + ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x2_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x2_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x2_param_0+4]; @@ -97,9 +97,9 @@ define dso_local void @foo_St4x3(ptr nocapture noundef readonly byval(%struct.St4x3) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x3( ; CHECK: .param .align 4 .b8 foo_St4x3_param_0[12], - ; CHECK: .param .b32 foo_St4x3_param_1 + ; CHECK: .param .b64 foo_St4x3_param_1 ; CHECK: ) - ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x3_param_1]; + ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x3_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x3_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x3_param_0+4]; @@ -123,9 +123,9 @@ define dso_local void @foo_St4x4(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x4( ; CHECK: .param .align 4 .b8 foo_St4x4_param_0[16], - ; CHECK: .param .b32 foo_St4x4_param_1 + ; CHECK: .param .b64 foo_St4x4_param_1 ; CHECK: ) - ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x4_param_1]; + ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x4_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x4_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x4_param_0+4]; @@ -155,9 +155,9 @@ define dso_local void @foo_St4x5(ptr nocapture noundef readonly byval(%struct.St4x5) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x5( ; CHECK: .param .align 4 .b8 foo_St4x5_param_0[20], - ; CHECK: .param .b32 foo_St4x5_param_1 + ; CHECK: .param .b64 foo_St4x5_param_1 ; CHECK: ) - ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x5_param_1]; + ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x5_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x5_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x5_param_0+4]; @@ -193,9 +193,9 @@ define dso_local void @foo_St4x6(ptr nocapture noundef readonly byval(%struct.St4x6) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x6( ; CHECK: .param .align 4 .b8 foo_St4x6_param_0[24], - ; CHECK: .param .b32 foo_St4x6_param_1 + ; CHECK: .param .b64 foo_St4x6_param_1 ; CHECK: ) - ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x6_param_1]; + ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x6_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x6_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x6_param_0+4]; @@ -237,9 +237,9 @@ define dso_local void @foo_St4x7(ptr nocapture noundef readonly byval(%struct.St4x7) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x7( ; CHECK: .param .align 4 .b8 foo_St4x7_param_0[28], - ; CHECK: .param .b32 foo_St4x7_param_1 + ; CHECK: .param .b64 foo_St4x7_param_1 ; CHECK: ) - ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x7_param_1]; + ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x7_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x7_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x7_param_0+4]; @@ -287,9 +287,9 @@ define dso_local void @foo_St4x8(ptr nocapture noundef readonly byval(%struct.St4x8) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x8( ; CHECK: .param .align 4 .b8 foo_St4x8_param_0[32], - ; CHECK: .param .b32 foo_St4x8_param_1 + ; CHECK: .param .b64 foo_St4x8_param_1 ; CHECK: ) - ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x8_param_1]; + ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x8_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x8_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x8_param_0+4]; @@ -343,9 +343,9 @@ define dso_local void @foo_St8x1(ptr nocapture noundef readonly byval(%struct.St8x1) align 8 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St8x1( ; CHECK: .param .align 8 .b8 foo_St8x1_param_0[8], - ; CHECK: .param .b32 foo_St8x1_param_1 + ; CHECK: .param .b64 foo_St8x1_param_1 ; CHECK: ) - ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x1_param_1]; + ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x1_param_1]; ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x1_param_0]; ; CHECK: st.u64 [[[R1]]], [[RD1]]; ; CHECK: ret; @@ -357,9 +357,9 @@ define dso_local void @foo_St8x2(ptr nocapture noundef readonly byval(%struct.St8x2) align 8 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St8x2( ; CHECK: .param .align 8 .b8 foo_St8x2_param_0[16], - ; CHECK: .param .b32 foo_St8x2_param_1 + ; CHECK: .param .b64 foo_St8x2_param_1 ; CHECK: ) - ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x2_param_1]; + ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x2_param_1]; ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x2_param_0]; ; CHECK: st.u64 [[[R1]]], [[RD1]]; ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x2_param_0+8]; @@ -377,9 +377,9 @@ define dso_local void @foo_St8x3(ptr nocapture noundef readonly byval(%struct.St8x3) align 8 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St8x3( ; CHECK: .param .align 8 .b8 foo_St8x3_param_0[24], - ; CHECK: .param .b32 foo_St8x3_param_1 + ; CHECK: .param .b64 foo_St8x3_param_1 ; CHECK: ) - ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x3_param_1]; + ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x3_param_1]; ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x3_param_0]; ; CHECK: st.u64 [[[R1]]], [[RD1]]; ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x3_param_0+8]; @@ -403,9 +403,9 @@ define dso_local void @foo_St8x4(ptr nocapture noundef readonly byval(%struct.St8x4) align 8 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St8x4( ; CHECK: .param .align 8 .b8 foo_St8x4_param_0[32], - ; CHECK: .param .b32 foo_St8x4_param_1 + ; CHECK: .param .b64 foo_St8x4_param_1 ; CHECK: ) - ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x4_param_1]; + ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x4_param_1]; ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x4_param_0]; ; CHECK: st.u64 [[[R1]]], [[RD1]]; ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x4_param_0+8]; diff --git a/llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll b/llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll --- a/llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll +++ b/llvm/test/CodeGen/NVPTX/pow2_mask_cmp.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=nvptx -verify-machineinstrs < %s | FileCheck %s -; RUN: %if ptxas %{ llc -march=nvptx -verify-machineinstrs < %s | %ptxas-verify %} +; RUN: llc -march=nvptx64 -verify-machineinstrs < %s | FileCheck %s +; RUN: %if ptxas %{ llc -march=nvptx64 -verify-machineinstrs < %s | %ptxas-verify %} ; Tests the following pattern: ; (X & 8) != 0 --> (X & 8) >> 3 diff --git a/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll --- a/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll +++ b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} define ptx_kernel void @t1(ptr %a) { diff --git a/llvm/test/CodeGen/NVPTX/pr16278.ll b/llvm/test/CodeGen/NVPTX/pr16278.ll --- a/llvm/test/CodeGen/NVPTX/pr16278.ll +++ b/llvm/test/CodeGen/NVPTX/pr16278.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} @one_f = addrspace(4) global float 1.000000e+00, align 4 diff --git a/llvm/test/CodeGen/NVPTX/pr17529.ll b/llvm/test/CodeGen/NVPTX/pr17529.ll --- a/llvm/test/CodeGen/NVPTX/pr17529.ll +++ b/llvm/test/CodeGen/NVPTX/pr17529.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll b/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll --- a/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll +++ b/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_35 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} ; Check load from constant global variables. These loads should be ; ld.global.nc (aka ldg). diff --git a/llvm/test/CodeGen/NVPTX/refl1.ll b/llvm/test/CodeGen/NVPTX/refl1.ll --- a/llvm/test/CodeGen/NVPTX/refl1.ll +++ b/llvm/test/CodeGen/NVPTX/refl1.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target triple = "nvptx-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/reg-types.ll b/llvm/test/CodeGen/NVPTX/reg-types.ll --- a/llvm/test/CodeGen/NVPTX/reg-types.ll +++ b/llvm/test/CodeGen/NVPTX/reg-types.ll @@ -3,7 +3,7 @@ ; RUN: llc -O0 < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s ; RUN: llc -O0 < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=NO8BIT ; RUN: llc -O0 < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=NO8BIT -; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc -O0 < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK-LABEL: .visible .func func() diff --git a/llvm/test/CodeGen/NVPTX/rotate.ll b/llvm/test/CodeGen/NVPTX/rotate.ll --- a/llvm/test/CodeGen/NVPTX/rotate.ll +++ b/llvm/test/CodeGen/NVPTX/rotate.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck --check-prefix=SM20 %s -; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck --check-prefix=SM35 %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck --check-prefix=SM20 %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck --check-prefix=SM35 %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} declare i32 @llvm.nvvm.rotate.b32(i32, i32) diff --git a/llvm/test/CodeGen/NVPTX/rotate_64.ll b/llvm/test/CodeGen/NVPTX/rotate_64.ll --- a/llvm/test/CodeGen/NVPTX/rotate_64.ll +++ b/llvm/test/CodeGen/NVPTX/rotate_64.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} declare i64 @llvm.nvvm.rotate.b64(i64, i32) declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) diff --git a/llvm/test/CodeGen/NVPTX/sched1.ll b/llvm/test/CodeGen/NVPTX/sched1.ll --- a/llvm/test/CodeGen/NVPTX/sched1.ll +++ b/llvm/test/CodeGen/NVPTX/sched1.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; Ensure source scheduling is working diff --git a/llvm/test/CodeGen/NVPTX/sched2.ll b/llvm/test/CodeGen/NVPTX/sched2.ll --- a/llvm/test/CodeGen/NVPTX/sched2.ll +++ b/llvm/test/CodeGen/NVPTX/sched2.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} define void @foo(ptr %a) { ; CHECK: .func foo diff --git a/llvm/test/CodeGen/NVPTX/sext-in-reg.ll b/llvm/test/CodeGen/NVPTX/sext-in-reg.ll --- a/llvm/test/CodeGen/NVPTX/sext-in-reg.ll +++ b/llvm/test/CodeGen/NVPTX/sext-in-reg.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/llvm/test/CodeGen/NVPTX/sext-params.ll b/llvm/test/CodeGen/NVPTX/sext-params.ll --- a/llvm/test/CodeGen/NVPTX/sext-params.ll +++ b/llvm/test/CodeGen/NVPTX/sext-params.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/llvm/test/CodeGen/NVPTX/shift-parts.ll b/llvm/test/CodeGen/NVPTX/shift-parts.ll --- a/llvm/test/CodeGen/NVPTX/shift-parts.ll +++ b/llvm/test/CodeGen/NVPTX/shift-parts.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK: shift_parts_left_128 define void @shift_parts_left_128(ptr %val, ptr %amtptr) { diff --git a/llvm/test/CodeGen/NVPTX/short-ptr.ll b/llvm/test/CodeGen/NVPTX/short-ptr.ll --- a/llvm/test/CodeGen/NVPTX/short-ptr.ll +++ b/llvm/test/CodeGen/NVPTX/short-ptr.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix CHECK-DEFAULT-32 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-short-ptr | FileCheck %s --check-prefixes CHECK-SHORT-SHARED,CHECK-SHORT-CONST,CHECK-SHORT-LOCAL -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-short-ptr | %ptxas-verify %} diff --git a/llvm/test/CodeGen/NVPTX/simple-call.ll b/llvm/test/CodeGen/NVPTX/simple-call.ll --- a/llvm/test/CodeGen/NVPTX/simple-call.ll +++ b/llvm/test/CodeGen/NVPTX/simple-call.ll @@ -1,26 +1,26 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} - -; CHECK: .func ({{.*}}) device_func -define float @device_func(float %a) noinline { - %ret = fmul float %a, %a - ret float %ret -} - -; CHECK: .entry kernel_func -define void @kernel_func(ptr %a) { - %val = load float, ptr %a -; CHECK: call.uni (retval0), -; CHECK: device_func, - %mul = call float @device_func(float %val) - store float %mul, ptr %a - ret void -} - - - -!nvvm.annotations = !{!1} - -!1 = !{ptr @kernel_func, !"kernel", i32 1} +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} + +; CHECK: .func ({{.*}}) device_func +define float @device_func(float %a) noinline { + %ret = fmul float %a, %a + ret float %ret +} + +; CHECK: .entry kernel_func +define void @kernel_func(ptr %a) { + %val = load float, ptr %a +; CHECK: call.uni (retval0), +; CHECK: device_func, + %mul = call float @device_func(float %val) + store float %mul, ptr %a + ret void +} + + + +!nvvm.annotations = !{!1} + +!1 = !{ptr @kernel_func, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll --- a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll +++ b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 \ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 \ ; RUN: | FileCheck %s ; RUN: %if ptxas %{ \ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 \ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 \ ; RUN: | %ptxas-verify \ ; RUN: %} diff --git a/llvm/test/CodeGen/NVPTX/st-addrspace.ll b/llvm/test/CodeGen/NVPTX/st-addrspace.ll --- a/llvm/test/CodeGen/NVPTX/st-addrspace.ll +++ b/llvm/test/CodeGen/NVPTX/st-addrspace.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G32,LS32 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G64,LS64 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | FileCheck %s --check-prefixes=G64,LS32 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %} diff --git a/llvm/test/CodeGen/NVPTX/st-generic.ll b/llvm/test/CodeGen/NVPTX/st-generic.ll --- a/llvm/test/CodeGen/NVPTX/st-generic.ll +++ b/llvm/test/CodeGen/NVPTX/st-generic.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ;; i8 diff --git a/llvm/test/CodeGen/NVPTX/store-retval.ll b/llvm/test/CodeGen/NVPTX/store-retval.ll --- a/llvm/test/CodeGen/NVPTX/store-retval.ll +++ b/llvm/test/CodeGen/NVPTX/store-retval.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s --mtriple=nvptx-unknown-unknown | FileCheck %s -; RUN: %if ptxas %{ llc < %s --mtriple=nvptx-unknown-unknown | %ptxas-verify %} +; RUN: llc < %s --mtriple=nvptx64-unknown-unknown | FileCheck %s +; RUN: %if ptxas %{ llc < %s --mtriple=nvptx64-unknown-unknown | %ptxas-verify %} ; ; This is IR generated with clang using -O3 optimization level ; and nvptx-unknown-unknown target from the following C code. diff --git a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll --- a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 -; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} target triple = "nvptx-unknown-cuda" @@ -20,8 +20,8 @@ ; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] ; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] %ret = sitofp i32 %val to float -; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]] -; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]] +; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]] +; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]] store float %ret, ptr %red ret void } @@ -39,8 +39,8 @@ ; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] ; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] %ret = sitofp i32 %val to float -; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]] -; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]] +; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]] +; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]] store float %ret, ptr %red ret void } diff --git a/llvm/test/CodeGen/NVPTX/surf-read.ll b/llvm/test/CodeGen/NVPTX/surf-read.ll --- a/llvm/test/CodeGen/NVPTX/surf-read.ll +++ b/llvm/test/CodeGen/NVPTX/surf-read.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} target triple = "nvptx-unknown-nvcl" @@ -11,7 +11,7 @@ %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx) ; CHECK: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] %ret = sitofp i32 %val to float -; CHECK: st.f32 [%r{{[0-9]+}}], %f[[REDF]] +; CHECK: st.f32 [%rd{{[0-9]+}}], %f[[REDF]] store float %ret, ptr %red ret void } diff --git a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll --- a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 -; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} target triple = "nvptx-unknown-cuda" diff --git a/llvm/test/CodeGen/NVPTX/surf-write.ll b/llvm/test/CodeGen/NVPTX/surf-write.ll --- a/llvm/test/CodeGen/NVPTX/surf-write.ll +++ b/llvm/test/CodeGen/NVPTX/surf-write.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} target triple = "nvptx-unknown-nvcl" diff --git a/llvm/test/CodeGen/NVPTX/symbol-naming.ll b/llvm/test/CodeGen/NVPTX/symbol-naming.ll --- a/llvm/test/CodeGen/NVPTX/symbol-naming.ll +++ b/llvm/test/CodeGen/NVPTX/symbol-naming.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %} ; Verify that the NVPTX target removes invalid symbol names prior to emitting diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll --- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 -; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} target triple = "nvptx-unknown-cuda" @@ -18,8 +18,8 @@ ; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}] %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %idx) %ret = extractvalue { float, float, float, float } %val, 0 -; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RED]] -; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RED]] +; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]] +; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]] store float %ret, ptr %red ret void } @@ -36,8 +36,8 @@ ; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}] %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx) %ret = extractvalue { float, float, float, float } %val, 0 -; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RED]] -; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RED]] +; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]] +; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]] store float %ret, ptr %red ret void } @@ -63,8 +63,8 @@ ; SM20: add.rn.f32 %f[[RET2:[0-9]+]], %f[[RED]], %f[[TEXCALL]] ; SM30: add.rn.f32 %f[[RET2:[0-9]+]], %f[[RED]], %f[[TEXCALL]] %ret2 = fadd float %ret, %texcall -; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RET2]] -; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RET2]] +; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RET2]] +; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RET2]] store float %ret2, ptr %red ret void } diff --git a/llvm/test/CodeGen/NVPTX/tex-read.ll b/llvm/test/CodeGen/NVPTX/tex-read.ll --- a/llvm/test/CodeGen/NVPTX/tex-read.ll +++ b/llvm/test/CodeGen/NVPTX/tex-read.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} target triple = "nvptx-unknown-nvcl" @@ -10,7 +10,7 @@ ; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}] %val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64 %img, i64 %sampler, i32 %idx) %ret = extractvalue { float, float, float, float } %val, 0 -; CHECK: st.f32 [%r{{[0-9]+}}], %f[[RED]] +; CHECK: st.f32 [%rd{{[0-9]+}}], %f[[RED]] store float %ret, ptr %red ret void } diff --git a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll --- a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll +++ b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 -; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} target triple = "nvptx-unknown-cuda" diff --git a/llvm/test/CodeGen/NVPTX/tuple-literal.ll b/llvm/test/CodeGen/NVPTX/tuple-literal.ll deleted file mode 100644 --- a/llvm/test/CodeGen/NVPTX/tuple-literal.ll +++ /dev/null @@ -1,5 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 %if ptxas %{ | %ptxas-verify %} - -define ptx_device void @test_function(ptr) { - ret void -} diff --git a/llvm/test/CodeGen/NVPTX/vaargs.ll b/llvm/test/CodeGen/NVPTX/vaargs.ll --- a/llvm/test/CodeGen/NVPTX/vaargs.ll +++ b/llvm/test/CodeGen/NVPTX/vaargs.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -O0 -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s --check-prefixes=CHECK,CHECK32 ; RUN: llc < %s -O0 -march=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s --check-prefixes=CHECK,CHECK64 -; RUN: %if ptxas %{ llc < %s -O0 -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -O0 -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -O0 -march=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %} ; CHECK: .address_size [[BITS:32|64]] diff --git a/llvm/test/CodeGen/NVPTX/vec-param-load.ll b/llvm/test/CodeGen/NVPTX/vec-param-load.ll --- a/llvm/test/CodeGen/NVPTX/vec-param-load.ll +++ b/llvm/test/CodeGen/NVPTX/vec-param-load.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/llvm/test/CodeGen/NVPTX/vec8.ll b/llvm/test/CodeGen/NVPTX/vec8.ll --- a/llvm/test/CodeGen/NVPTX/vec8.ll +++ b/llvm/test/CodeGen/NVPTX/vec8.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target triple = "nvptx-unknown-cuda" @@ -7,7 +7,7 @@ define void @foo(<8 x i8> %a, ptr %b) { ; CHECK-DAG: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [foo_param_0] ; CHECK-DAG: ld.param.v4.u8 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [foo_param_0+4] -; CHECK-DAG: ld.param.u32 %[[B:r[0-9+]]], [foo_param_1] +; CHECK-DAG: ld.param.u64 %[[B:rd[0-9+]]], [foo_param_1] ; CHECK: add.s16 [[T:%rs[0-9+]]], [[E1]], [[E6]]; ; CHECK: st.u8 [%[[B]]], [[T]]; %t0 = extractelement <8 x i8> %a, i32 1 diff --git a/llvm/test/CodeGen/NVPTX/vector-args.ll b/llvm/test/CodeGen/NVPTX/vector-args.ll --- a/llvm/test/CodeGen/NVPTX/vector-args.ll +++ b/llvm/test/CodeGen/NVPTX/vector-args.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} define float @foo(<2 x float> %a) { ; CHECK: .func (.param .b32 func_retval0) foo diff --git a/llvm/test/CodeGen/NVPTX/vector-call.ll b/llvm/test/CodeGen/NVPTX/vector-call.ll --- a/llvm/test/CodeGen/NVPTX/vector-call.ll +++ b/llvm/test/CodeGen/NVPTX/vector-call.ll @@ -1,31 +1,31 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} - -target triple = "nvptx-unknown-cuda" - -declare void @bar(<4 x i32>) - -; CHECK-LABEL: .func foo( -; CHECK-DAG: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [foo_param_0]; -; CHECK: .param .align 16 .b8 param0[16]; -; CHECK-DAG: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; -; CHECK: call.uni -; CHECK: ret; -define void @foo(<4 x i32> %a) { - tail call void @bar(<4 x i32> %a) - ret void -} - -; CHECK-LABEL: .func foo3( -; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [foo3_param_0]; -; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [foo3_param_0+8]; -; CHECK: .param .align 16 .b8 param0[16]; -; CHECK-DAG: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; -; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; -; CHECK: call.uni -; CHECK: ret; -declare void @bar3(<3 x i32>) -define void @foo3(<3 x i32> %a) { - tail call void @bar3(<3 x i32> %a) - ret void -} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} + +target triple = "nvptx-unknown-cuda" + +declare void @bar(<4 x i32>) + +; CHECK-LABEL: .func foo( +; CHECK-DAG: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [foo_param_0]; +; CHECK: .param .align 16 .b8 param0[16]; +; CHECK-DAG: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; +; CHECK: call.uni +; CHECK: ret; +define void @foo(<4 x i32> %a) { + tail call void @bar(<4 x i32> %a) + ret void +} + +; CHECK-LABEL: .func foo3( +; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [foo3_param_0]; +; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [foo3_param_0+8]; +; CHECK: .param .align 16 .b8 param0[16]; +; CHECK-DAG: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; +; CHECK: call.uni +; CHECK: ret; +declare void @bar3(<3 x i32>) +define void @foo3(<3 x i32> %a) { + tail call void @bar3(<3 x i32> %a) + ret void +} diff --git a/llvm/test/CodeGen/NVPTX/vector-compare.ll b/llvm/test/CodeGen/NVPTX/vector-compare.ll --- a/llvm/test/CodeGen/NVPTX/vector-compare.ll +++ b/llvm/test/CodeGen/NVPTX/vector-compare.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify -m32 %} +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify -m32 %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; This test makes sure that the result of vector compares are properly diff --git a/llvm/test/CodeGen/NVPTX/vector-loads.ll b/llvm/test/CodeGen/NVPTX/vector-loads.ll --- a/llvm/test/CodeGen/NVPTX/vector-loads.ll +++ b/llvm/test/CodeGen/NVPTX/vector-loads.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; Even though general vector types are not supported in PTX, we can still ; optimize loads/stores with pseudo-vector instructions of the form: diff --git a/llvm/test/CodeGen/NVPTX/vector-select.ll b/llvm/test/CodeGen/NVPTX/vector-select.ll --- a/llvm/test/CodeGen/NVPTX/vector-select.ll +++ b/llvm/test/CodeGen/NVPTX/vector-select.ll @@ -1,16 +1,27 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 %if ptxas %{ | %ptxas-verify %} -; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 %if ptxas %{ | %ptxas-verify %} +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas %{llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; This test makes sure that vector selects are scalarized by the type legalizer. ; If not, type legalization will fail. +; CHECK-LABEL: .visible .func foo( define void @foo(ptr addrspace(1) %def_a, ptr addrspace(1) %def_b, ptr addrspace(1) %def_c) { entry: +; CHECK: ld.global.v2.u32 +; CHECK: ld.global.v2.u32 +; CHECK: ld.global.v2.u32 %tmp4 = load <2 x i32>, ptr addrspace(1) %def_a %tmp6 = load <2 x i32>, ptr addrspace(1) %def_c %tmp8 = load <2 x i32>, ptr addrspace(1) %def_b +; CHECK: setp.gt.s32 +; CHECK: setp.gt.s32 %0 = icmp sge <2 x i32> %tmp4, zeroinitializer +; CHECK: selp.b32 +; CHECK: selp.b32 %cond = select <2 x i1> %0, <2 x i32> %tmp6, <2 x i32> %tmp8 +; CHECK: st.global.v2.u32 store <2 x i32> %cond, ptr addrspace(1) %def_c ret void } diff --git a/llvm/test/CodeGen/NVPTX/vector-stores.ll b/llvm/test/CodeGen/NVPTX/vector-stores.ll --- a/llvm/test/CodeGen/NVPTX/vector-stores.ll +++ b/llvm/test/CodeGen/NVPTX/vector-stores.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK: .visible .func foo1 ; CHECK: st.v2.f32 diff --git a/llvm/test/CodeGen/NVPTX/weak-global.ll b/llvm/test/CodeGen/NVPTX/weak-global.ll --- a/llvm/test/CodeGen/NVPTX/weak-global.ll +++ b/llvm/test/CodeGen/NVPTX/weak-global.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK: .weak .global .align 4 .u32 g @g = common addrspace(1) global i32 zeroinitializer diff --git a/llvm/test/CodeGen/NVPTX/weak-linkage.ll b/llvm/test/CodeGen/NVPTX/weak-linkage.ll --- a/llvm/test/CodeGen/NVPTX/weak-linkage.ll +++ b/llvm/test/CodeGen/NVPTX/weak-linkage.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK: // .weak foo ; CHECK: .weak .func foo diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -305,6 +305,7 @@ (11, 7), (11, 8), (12, 0), + (12, 1), ] def version_int(ver):