Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -16753,25 +16753,14 @@ const CallExpr *E = nullptr) { auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr); auto *Call = CGF.Builder.CreateCall(F); - Call->addRetAttr( - Attribute::getWithDereferenceableBytes(Call->getContext(), 64)); - Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4))); if (!E) return Call; - QualType BuiltinRetType = E->getType(); - auto *RetTy = cast(CGF.ConvertType(BuiltinRetType)); - if (RetTy == Call->getType()) - return Call; - return CGF.Builder.CreateAddrSpaceCast(Call, RetTy); + return CGF.Builder.CreateAddrSpaceCast(Call, CGF.ConvertType(E->getType())); } Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) { auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr); - auto *Call = CGF.Builder.CreateCall(F); - Call->addRetAttr( - Attribute::getWithDereferenceableBytes(Call->getContext(), 256)); - Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8))); - return Call; + return CGF.Builder.CreateCall(F); } // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. Index: clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu +++ clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu @@ -1,16 +1,16 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ -// RUN: | FileCheck -check-prefix=PRECOV5 %s +// RUN: | FileCheck -check-prefixes=PRECOV5,CHECK %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \ -// RUN: | FileCheck -check-prefix=COV5 %s +// RUN: | FileCheck -check-prefixes=COV5,CHECK %s #include "Inputs/cuda.h" // PRECOV5-LABEL: test_get_workgroup_size -// PRECOV5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// PRECOV5: call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // PRECOV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4 // PRECOV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef // PRECOV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6 @@ -19,7 +19,7 @@ // PRECOV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef // COV5-LABEL: test_get_workgroup_size -// COV5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +// COV5: call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() // COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12 // COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef // COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14 @@ -36,4 +36,7 @@ } } +// COV4: declare align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// COV5: declare align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025} Index: clang/test/CodeGenCUDA/builtins-amdgcn.cu =================================================================== --- clang/test/CodeGenCUDA/builtins-amdgcn.cu +++ clang/test/CodeGenCUDA/builtins-amdgcn.cu @@ -21,7 +21,7 @@ // CHECK-NEXT: store ptr [[TMP0]], ptr [[OUT_ASCAST]], align 8 // CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8 // CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr // CHECK-NEXT: store ptr [[TMP2]], ptr [[DISPATCH_PTR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DISPATCH_PTR_ASCAST]], align 8 @@ -154,7 +154,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr -// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr // CHECK-NEXT: store ptr [[TMP1]], ptr [[X_ASCAST]], align 8 // CHECK-NEXT: ret void Index: clang/test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -516,12 +516,15 @@ } // CHECK-LABEL: @test_dispatch_ptr -// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK: call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() void test_dispatch_ptr(__constant unsigned char ** out) { *out = __builtin_amdgcn_dispatch_ptr(); } +// CHECK: declare align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + + // CHECK-LABEL: @test_queue_ptr // CHECK: call ptr addrspace(4) @llvm.amdgcn.queue.ptr() void test_queue_ptr(__constant unsigned char ** out) @@ -543,6 +546,9 @@ *out = __builtin_amdgcn_implicitarg_ptr(); } +// CHECK: declare align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + + // CHECK-LABEL: @test_get_group_id( // CHECK: tail call i32 @llvm.amdgcn.workgroup.id.x() // CHECK: tail call i32 @llvm.amdgcn.workgroup.id.y() @@ -583,7 +589,7 @@ } // CHECK-LABEL: @test_get_workgroup_size( -// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK: call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // CHECK: getelementptr i8, ptr addrspace(4) %{{.*}}, i64 4 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef // CHECK: getelementptr i8, ptr addrspace(4) %{{.*}}, i64 6 @@ -601,7 +607,7 @@ } // CHECK-LABEL: @test_get_grid_size( -// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK: call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // CHECK: getelementptr i8, ptr addrspace(4) %{{.*}}, i64 12 // CHECK: load i32, ptr addrspace(4) %{{.*}}, align 4, !invariant.load // CHECK: getelementptr i8, ptr addrspace(4) %{{.*}}, i64 16 Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -94,6 +94,11 @@ int Align = align; } +class Dereferenceable : IntrinsicProperty { + int ArgNo = idx.Value; + int Bytes = bytes; +} + // Returned - The specified argument is always the return value of the // intrinsic. class Returned : IntrinsicProperty { Index: llvm/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -141,8 +141,10 @@ <"__builtin_amdgcn_workgroup_id">; def int_amdgcn_dispatch_ptr : + ClangBuiltin<"__builtin_amdgcn_dispatch_ptr">, DefaultAttrsIntrinsic<[LLVMQualPointerType], [], - [Align, IntrNoMem, IntrSpeculatable]>; + [Align, Dereferenceable, IntrNoMem, + IntrSpeculatable]>; def int_amdgcn_queue_ptr : ClangBuiltin<"__builtin_amdgcn_queue_ptr">, @@ -157,7 +159,8 @@ def int_amdgcn_implicitarg_ptr : ClangBuiltin<"__builtin_amdgcn_implicitarg_ptr">, DefaultAttrsIntrinsic<[LLVMQualPointerType], [], - [Align, IntrNoMem, IntrSpeculatable]>; + [Align, Dereferenceable, + IntrNoMem, IntrSpeculatable]>; def int_amdgcn_groupstaticsize : ClangBuiltin<"__builtin_amdgcn_groupstaticsize">, Index: llvm/utils/TableGen/CodeGenIntrinsics.h =================================================================== --- llvm/utils/TableGen/CodeGenIntrinsics.h +++ llvm/utils/TableGen/CodeGenIntrinsics.h @@ -119,7 +119,8 @@ WriteOnly, ReadNone, ImmArg, - Alignment + Alignment, + Dereferenceable }; struct ArgAttribute { Index: llvm/utils/TableGen/CodeGenTarget.cpp =================================================================== --- llvm/utils/TableGen/CodeGenTarget.cpp +++ llvm/utils/TableGen/CodeGenTarget.cpp @@ -923,6 +923,10 @@ unsigned ArgNo = R->getValueAsInt("ArgNo"); uint64_t Align = R->getValueAsInt("Align"); addArgAttribute(ArgNo, Alignment, Align); + } else if (R->isSubClassOf("Dereferenceable")) { + unsigned ArgNo = R->getValueAsInt("ArgNo"); + uint64_t Bytes = R->getValueAsInt("Bytes"); + addArgAttribute(ArgNo, Dereferenceable, Bytes); } else llvm_unreachable("Unknown property!"); } Index: llvm/utils/TableGen/IntrinsicEmitter.cpp =================================================================== --- llvm/utils/TableGen/IntrinsicEmitter.cpp +++ llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -726,6 +726,10 @@ OS << " Attribute::get(C, Attribute::Alignment, " << Attr.Value << "),\n"; break; + case CodeGenIntrinsic::Dereferenceable: + OS << " Attribute::get(C, Attribute::Dereferenceable, " + << Attr.Value << "),\n"; + break; } } OS << " });\n";