diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -99,6 +99,31 @@ BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc") BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc") +TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_x, "i", "nc", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_y, "i", "nc", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_z, "i", "nc", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_w, "i", "nc", AND(SM_90, PTX78)) + +TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_x, "i", "nc", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_y, "i", "nc", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_z, "i", "nc", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_w, "i", "nc", AND(SM_90, PTX78)) + +TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_x, "i", "nc", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_y, "i", "nc", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_z, "i", "nc", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_w, "i", "nc", AND(SM_90, PTX78)) + +TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_x, "i", "nc", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_y, "i", "nc", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_z, "i", "nc", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_w, "i", "nc", AND(SM_90, PTX78)) + +TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctarank, "i", "nc", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctarank, "i", "nc", AND(SM_90, PTX78)) + +TARGET_BUILTIN(__nvvm_is_explicit_cluster, "b", "nc", AND(SM_90, PTX78)) + BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc") BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc") BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc") @@ -865,6 +890,7 @@ BUILTIN(__nvvm_isspacep_global, "bvC*", "nc") BUILTIN(__nvvm_isspacep_local, "bvC*", "nc") BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc") +TARGET_BUILTIN(__nvvm_isspacep_shared_cluster,"bvC*", "nc", AND(SM_90,PTX78)) // Builtins to support WMMA instructions on sm_70 TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60)) @@ -988,6 +1014,11 @@ TARGET_BUILTIN(__nvvm_neg_bf16, "UsUs", "", AND(SM_80,PTX70)) TARGET_BUILTIN(__nvvm_neg_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__nvvm_mapa, "v*v*i", "", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_mapa_shared_cluster, "v*3v*3i", "", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_getctarank, "iv*", "", AND(SM_90, PTX78)) +TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) + #undef BUILTIN #undef TARGET_BUILTIN #pragma pop_macro("AND") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18885,6 +18885,83 @@ return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16, Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E, 16); + case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x)); + case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y)); + case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z)); + case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w)); + case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x)); + case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y)); + case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z)); + case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank)); + case NVPTX::BI__nvvm_is_explicit_cluster: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster)); + case NVPTX::BI__nvvm_isspacep_shared_cluster: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster), + EmitScalarExpr(E->getArg(0))); + case NVPTX::BI__nvvm_mapa: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_mapa), + {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); + case NVPTX::BI__nvvm_mapa_shared_cluster: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster), + {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); + case NVPTX::BI__nvvm_getctarank: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_getctarank), + EmitScalarExpr(E->getArg(0))); + case NVPTX::BI__nvvm_getctarank_shared_cluster: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster), + EmitScalarExpr(E->getArg(0))); default: return nullptr; } diff --git a/clang/test/CodeGenCUDA/builtins-sm90.cu b/clang/test/CodeGenCUDA/builtins-sm90.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/builtins-sm90.cu @@ -0,0 +1,61 @@ +// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" "-target-feature" "+ptx78" "-target-cpu" "sm_90" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s + +// CHECK: define{{.*}} void @_Z6kernelPlPvj( +__attribute__((global)) void kernel(long *out, void *ptr, unsigned u) { + int i = 0; + // CHECK: call i1 @llvm.nvvm.isspacep.shared.cluster + out[i++] = __nvvm_isspacep_shared_cluster(ptr); + + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.x() + out[i++] = __nvvm_read_ptx_sreg_clusterid_x(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.y() + out[i++] = __nvvm_read_ptx_sreg_clusterid_y(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.z() + out[i++] = __nvvm_read_ptx_sreg_clusterid_z(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.w() + out[i++] = __nvvm_read_ptx_sreg_clusterid_w(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x() + out[i++] = __nvvm_read_ptx_sreg_nclusterid_x(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y() + out[i++] = __nvvm_read_ptx_sreg_nclusterid_y(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z() + out[i++] = __nvvm_read_ptx_sreg_nclusterid_z(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.w() + out[i++] = __nvvm_read_ptx_sreg_nclusterid_w(); + + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x() + out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_x(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y() + out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_y(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z() + out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_z(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.w() + out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_w(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x() + out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_x(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y() + out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_y(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z() + out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_z(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.w() + out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_w(); + + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank() + out[i++] = __nvvm_read_ptx_sreg_cluster_ctarank(); + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank() + out[i++] = __nvvm_read_ptx_sreg_cluster_nctarank(); + // CHECK: call i1 @llvm.nvvm.is_explicit_cluster() + out[i++] = __nvvm_is_explicit_cluster(); + + auto * sptr = (__attribute__((address_space(3))) void *)ptr; + // CHECK: call ptr @llvm.nvvm.mapa(ptr %{{.*}}, i32 %{{.*}}) + out[i++] = (long) __nvvm_mapa(ptr, u); + // CHECK: call ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %{{.*}}, i32 %{{.*}}) + out[i++] = (long) __nvvm_mapa_shared_cluster(sptr, u); + // CHECK: call i32 @llvm.nvvm.getctarank(ptr {{.*}}) + out[i++] = __nvvm_getctarank(ptr); + // CHECK: call i32 @llvm.nvvm.getctarank.shared.cluster(ptr addrspace(3) {{.*}}) + out[i++] = __nvvm_getctarank_shared_cluster(sptr); + + // CHECK: ret void +}