diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -115,11 +115,11 @@ class Proc Features> : Processor; -def : Proc<"sm_20", [SM20]>; -def : Proc<"sm_21", [SM21]>; +def : Proc<"sm_20", [SM20, PTX32]>; +def : Proc<"sm_21", [SM21, PTX32]>; def : Proc<"sm_30", [SM30]>; def : Proc<"sm_32", [SM32, PTX40]>; -def : Proc<"sm_35", [SM35]>; +def : Proc<"sm_35", [SM35, PTX32]>; def : Proc<"sm_37", [SM37, PTX41]>; def : Proc<"sm_50", [SM50, PTX40]>; def : Proc<"sm_52", [SM52, PTX41]>; diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -33,13 +33,13 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { // Provide the default CPU if we don't have one. - TargetName = std::string(CPU.empty() ? "sm_20" : CPU); + TargetName = std::string(CPU.empty() ? "sm_30" : CPU); ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS); - // Set default to PTX 3.2 (CUDA 5.5) + // Set default to PTX 6.0 (CUDA 9.0) if (PTXVersion == 0) { - PTXVersion = 32; + PTXVersion = 60; } return *this; diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll @@ -1,7 +1,7 @@ ; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type. ; Verify that __nvvm_reflect() is replaced with an appropriate value. ; -; RUN: opt %s -S -passes='default' -mtriple=nvptx64 \ +; RUN: opt %s -S -passes='default' -mtriple=nvptx64 -mcpu=sm_20 \ ; RUN: | FileCheck %s --check-prefixes=COMMON,SM20 ; RUN: opt %s -S -passes='default' -mtriple=nvptx64 -mcpu=sm_35 \ ; RUN: | FileCheck %s --check-prefixes=COMMON,SM35 diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll @@ -1,6 +1,6 @@ ; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value ; -; RUN: opt %s -S -passes='default' -mtriple=nvptx64 \ +; RUN: opt %s -S -passes='default' -mtriple=nvptx64 -mcpu=sm_20 \ ; RUN: | FileCheck %s --check-prefixes=COMMON,SM20 ; RUN: opt %s -S -passes='default' -mtriple=nvptx64 -mcpu=sm_35 \ ; RUN: | FileCheck %s --check-prefixes=COMMON,SM35 diff --git a/llvm/test/CodeGen/NVPTX/sm-version.ll b/llvm/test/CodeGen/NVPTX/sm-version.ll --- a/llvm/test/CodeGen/NVPTX/sm-version.ll +++ b/llvm/test/CodeGen/NVPTX/sm-version.ll @@ -32,7 +32,9 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s --check-prefix=SM80 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_86 | FileCheck %s --check-prefix=SM86 -; SM30: .version 3.2 +; SM20: .version 3.2 +; SM21: .version 3.2 +; SM30: .version 6.0 ; SM32: .version 4.0 ; SM35: .version 3.2 ; SM37: .version 4.1 diff --git a/llvm/test/CodeGen/NVPTX/surf-tex.py b/llvm/test/CodeGen/NVPTX/surf-tex.py --- a/llvm/test/CodeGen/NVPTX/surf-tex.py +++ b/llvm/test/CodeGen/NVPTX/surf-tex.py @@ -1,6 +1,6 @@ # RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll -# RUN: llc %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll -# RUN: %if ptxas %{ llc %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %} +# RUN: llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll +# RUN: %if ptxas %{ llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %} # We only need to run this second time for texture tests, because # there is a difference between unified and non-unified intrinsics. diff --git a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll --- a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll +++ b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-nvidia-cuda | %ptxas-verify %} -; CHECK: .target sm_20, debug +; CHECK: .target sm_30, debug ; CHECK: .visible .func use_dbg_declare() ; CHECK: .local .align 8 .b8 __local_depot0[8];