diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -824,6 +824,8 @@ BUILTIN(__nvvm_isspacep_local, "bvC*", "nc") BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc") +BUILTIN(__nvvm_reflect, "icC*", "nc") + // Builtins to support WMMA instructions on sm_70 TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60)) TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60)) diff --git a/clang/test/CodeGenCUDA/nvvm-reflect.cu b/clang/test/CodeGenCUDA/nvvm-reflect.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/nvvm-reflect.cu @@ -0,0 +1,81 @@ +// REQUIRES: nvptx-registered-target + +// Checking to see that __nvvm_reflect resolves to the correct llvm.nvvm.reflect +// intrinsic +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -disable-llvm-passes -S -emit-llvm -x c++ %s -o - | FileCheck %s --check-prefix=NO_NVVM_REFLECT_PASS + +// Prepare bitcode file to link with +// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \ +// RUN: -disable-llvm-passes -o %t.bc %s + +// Checking to see if the correct values are substituted for the nvvm_reflect +// call when llvm passes are enabled. +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \ +// RUN: sm_50 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH_REFLECT_1 +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \ +// RUN: sm_52 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH_REFLECT_2 +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \ +// RUN: sm_53 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH_REFLECT_3 +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \ +// RUN: sm_60 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH_REFLECT_4 +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \ +// RUN: sm_61 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH_REFLECT_5 +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \ +// RUN: sm_62 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH_REFLECT_6 +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \ +// RUN: sm_70 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH_REFLECT_7 +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \ +// RUN: sm_72 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH_REFLECT_8 +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \ +// RUN: sm_75 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH_REFLECT_9 +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \ +// RUN: sm_80 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH_REFLECT_10 +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \ +// RUN: sm_86 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH_REFLECT_11 + +// Check to see that nvvm_reflect("__CUDA_FTZ") returns 1 or 0 based on value +// of -fdenormal-fp-math-f32 flag +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda \ +// RUN: -fdenormal-fp-math-f32=preserve-sign -S -o /dev/null %s -mllvm \ +// RUN: -print-after-all 2>&1 \ +// RUN: | FileCheck %s --check-prefix=FTZ_REFLECT +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda \ +// RUN: -fdenormal-fp-math-f32=ieee -S -o /dev/null %s -mllvm \ +// RUN: -print-after-all 2>&1 \ +// RUN: | FileCheck %s --check-prefix=NO_FTZ_REFLECT + +#include "Inputs/cuda.h" + +__device__ int foo_arch() { + // NO_NVVM_REFLECT_PASS: call i32 @llvm.nvvm.reflect + // ARCH_REFLECT_1: ret i32 500 + // ARCH_REFLECT_2: ret i32 520 + // ARCH_REFLECT_3: ret i32 530 + // ARCH_REFLECT_4: ret i32 600 + // ARCH_REFLECT_5: ret i32 610 + // ARCH_REFLECT_6: ret i32 620 + // ARCH_REFLECT_7: ret i32 700 + // ARCH_REFLECT_8: ret i32 720 + // ARCH_REFLECT_9: ret i32 750 + // ARCH_REFLECT_10: ret i32 800 + // ARCH_REFLECT_11: ret i32 860 + return __nvvm_reflect("__CUDA_ARCH"); +} + +__device__ int foo_ftz() { + // FTZ_REFLECT: ret i32 1 + // NO_FTZ_REFLECT: ret i32 0 + return __nvvm_reflect("__CUDA_FTZ"); +} + diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -1578,7 +1578,8 @@ Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">; def int_nvvm_reflect : - Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.reflect">; + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem], "llvm.nvvm.reflect">, + ClangBuiltin<"__nvvm_reflect">; // isspacep.{const, global, local, shared} def int_nvvm_isspacep_const diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll @@ -41,7 +41,7 @@ ret float %ret } -declare i32 @llvm.nvvm.reflect.p0i8(ptr) +declare i32 @llvm.nvvm.reflect(ptr) ; CHECK-LABEL: define i32 @intrinsic define i32 @intrinsic() { @@ -49,7 +49,7 @@ ; USE_FTZ_0: ret i32 0 ; USE_FTZ_1: ret i32 1 %ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(ptr addrspace(4) @str) - %reflect = tail call i32 @llvm.nvvm.reflect.p0i8(ptr %ptr) + %reflect = tail call i32 @llvm.nvvm.reflect(ptr %ptr) ret i32 %reflect } diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll @@ -41,7 +41,7 @@ ret float %ret } -declare i32 @llvm.nvvm.reflect.p0i8(i8*) +declare i32 @llvm.nvvm.reflect(i8*) ; CHECK-LABEL: define i32 @intrinsic define i32 @intrinsic() { @@ -49,7 +49,7 @@ ; USE_FTZ_0: ret i32 0 ; USE_FTZ_1: ret i32 1 %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0)) - %reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr) + %reflect = tail call i32 @llvm.nvvm.reflect(i8* %ptr) ret i32 %reflect }