diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2023,6 +2023,13 @@ let Documentation = [NoEscapeDocs]; } +def MaybeUndef : InheritableAttr { + let Spellings = [Clang<"maybe_undef">]; + let Subjects = SubjectList<[ParmVar]>; + let Documentation = [MaybeUndefDocs]; + let SimpleHandler = 1; +} + def AssumeAligned : InheritableAttr { let Spellings = [GCC<"assume_aligned">]; let Subjects = SubjectList<[ObjCMethod, Function]>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -257,6 +257,28 @@ }]; } +def MaybeUndefDocs : Documentation { + let Category = DocCatVariable; + let Content = [{ +The ``maybe_undef`` attribute can be placed on a function parameter. It indicates +that the parameter is allowed to use undef values. It informs the compiler +to insert a freeze LLVM IR instruction on the function parameter. +Please note that this is an attribute that is used as an internal +implementation detail and not intended to be used by external users. + +In languages HIP, CUDA etc., some functions have multi-threaded semantics and +it is enough for only one or some threads to provide defined arguments. +Depending on semantics, undef arguments in some threads don't produce +undefined results in the function call. Since, these functions accept undefined +arguments, ``maybe_undef`` attribute can be placed. + +Sample usage: +.. code-block:: c + + void maybeundeffunc(int __attribute__((maybe_undef))param); + }]; +} + def CarriesDependencyDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2046,6 +2046,27 @@ return false; } +/// Check if the argument of a function has maybe_undef attribute. +static bool IsArgumentMaybeUndef(const Decl *TargetDecl, + unsigned NumRequiredArgs, unsigned ArgNo) { + const auto *FD = dyn_cast_or_null(TargetDecl); + if (!FD) + return false; + + // Assume variadic arguments do not have maybe_undef attribute. + if (ArgNo >= NumRequiredArgs) + return false; + + // Check if argument has maybe_undef attribute. + if (ArgNo < FD->getNumParams()) { + const ParmVarDecl *Param = FD->getParamDecl(ArgNo); + if (Param && Param->hasAttr()) + return true; + } + + return false; +} + /// Construct the IR attribute list of a function or call. /// /// When adding an attribute, please consider where it should be handled: @@ -4821,6 +4842,9 @@ unsigned FirstIRArg, NumIRArgs; std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo); + bool ArgHasMaybeUndefAttr = + IsArgumentMaybeUndef(TargetDecl, CallInfo.getNumRequiredArgs(), ArgNo); + switch (ArgInfo.getKind()) { case ABIArgInfo::InAlloca: { assert(NumIRArgs == 0); @@ -4879,7 +4903,11 @@ // Make a temporary alloca to pass the argument. Address Addr = CreateMemTempWithoutCast( I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp"); - IRCallArgs[FirstIRArg] = Addr.getPointer(); + + llvm::Value *Val = Addr.getPointer(); + if (ArgHasMaybeUndefAttr) + Val = Builder.CreateFreeze(Addr.getPointer()); + IRCallArgs[FirstIRArg] = Val; I->copyInto(*this, Addr); } else { @@ -4937,7 +4965,10 @@ // Create an aligned temporary, and copy to it. Address AI = CreateMemTempWithoutCast( I->Ty, ArgInfo.getIndirectAlign(), "byval-temp"); - IRCallArgs[FirstIRArg] = AI.getPointer(); + llvm::Value *Val = AI.getPointer(); + if (ArgHasMaybeUndefAttr) + Val = Builder.CreateFreeze(AI.getPointer()); + IRCallArgs[FirstIRArg] = Val; // Emit lifetime markers for the temporary alloca. llvm::TypeSize ByvalTempElementSize = @@ -4956,9 +4987,13 @@ auto *T = llvm::PointerType::getWithSamePointeeType( cast(V->getType()), CGM.getDataLayout().getAllocaAddrSpace()); - IRCallArgs[FirstIRArg] = getTargetHooks().performAddrSpaceCast( + + llvm::Value *Val = getTargetHooks().performAddrSpaceCast( *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T, true); + if (ArgHasMaybeUndefAttr) + Val = Builder.CreateFreeze(Val); + IRCallArgs[FirstIRArg] = Val; } } break; @@ -5012,6 +5047,8 @@ V->getType() != IRFuncTy->getParamType(FirstIRArg)) V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg)); + if (ArgHasMaybeUndefAttr) + V = Builder.CreateFreeze(V); IRCallArgs[FirstIRArg] = V; break; } @@ -5056,6 +5093,8 @@ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Address EltPtr = Builder.CreateStructGEP(Src, i); llvm::Value *LI = Builder.CreateLoad(EltPtr); + if (ArgHasMaybeUndefAttr) + LI = Builder.CreateFreeze(LI); IRCallArgs[FirstIRArg + i] = LI; } } else { @@ -5072,6 +5111,9 @@ if (ATy != nullptr && isa(I->Ty.getCanonicalType())) Load = EmitCMSEClearRecord(Load, ATy, I->Ty); } + + if (ArgHasMaybeUndefAttr) + Load = Builder.CreateFreeze(Load); IRCallArgs[FirstIRArg] = Load; } @@ -5117,6 +5159,8 @@ if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; Address eltAddr = Builder.CreateStructGEP(addr, i); llvm::Value *elt = Builder.CreateLoad(eltAddr); + if (ArgHasMaybeUndefAttr) + elt = Builder.CreateFreeze(elt); IRCallArgs[IRArgPos++] = elt; } assert(IRArgPos == FirstIRArg + NumIRArgs); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8634,6 +8634,9 @@ case ParsedAttr::AT_NoEscape: handleNoEscapeAttr(S, D, AL); break; + case ParsedAttr::AT_MaybeUndef: + handleSimpleAttribute(S, D, AL); + break; case ParsedAttr::AT_AssumeAligned: handleAssumeAlignedAttr(S, D, AL); break; diff --git a/clang/test/CodeGen/attr-maybeundef-template.cpp b/clang/test/CodeGen/attr-maybeundef-template.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/attr-maybeundef-template.cpp @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: define{{.*}} void @{{.*}}test4{{.*}}(float +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP2:%.*]] = alloca float, align 4 +// CHECK-NEXT: store float [[TMP1:%.*]], float* [[TMP2:%.*]], align 4 +// CHECK-NEXT: ret void + +// CHECK-LABEL: define{{.*}} void @{{.*}}test4{{.*}}(i32 +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[TMP2:%.*]], align 4 +// CHECK-NEXT: ret void + +// CHECK-LABEL: define{{.*}} void @{{.*}}test{{.*}}( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = freeze i32 [[TMP3:%.*]] +// CHECK-NEXT: call void @{{.*}}test4{{.*}}(i32 noundef [[TMP4:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2:%.*]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = freeze float [[TMP5:%.*]] +// CHECK-NEXT: call void @{{.*}}test4{{.*}}(float noundef [[TMP6:%.*]]) +// CHECK-NEXT: ret void + +template +void test4(T __attribute__((maybe_undef)) arg) { + return; +} + +template +void test4(float arg); + +template +void test4(int arg); + +void test() { + int Var1; + float Var2; + test4(Var1); + test4(Var2); +} diff --git a/clang/test/CodeGen/attr-maybeundef.c b/clang/test/CodeGen/attr-maybeundef.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/attr-maybeundef.c @@ -0,0 +1,109 @@ +// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s + +#define __maybe_undef __attribute__((maybe_undef)) + +// CHECK: define{{.*}} void @t1(i32 noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]], i32 noundef [[TMP3:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP4:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP5:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP6:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[TMP4:%.*]], align 4 +// CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[TMP5:%.*]], align 4 +// CHECK-NEXT: store i32 [[TMP3:%.*]], i32* [[TMP6:%.*]], align 4 +// CHECK-NEXT: ret void + +// CHECK: define{{.*}} void @t2(i32 noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]], i32 noundef [[TMP3:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP4:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP5:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP6:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[TMP4:%.*]], align 4 +// CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[TMP5:%.*]], align 4 +// CHECK-NEXT: store i32 [[TMP3:%.*]], i32* [[TMP6:%.*]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP4:%.*]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP5:%.*]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP6:%.*]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = freeze i32 [[TMP8:%.*]] +// CHECK-NEXT: call void @t1(i32 noundef [[TMP7:%.*]], i32 noundef [[TMP10:%.*]], i32 noundef [[TMP9:%.*]]) +// CHECK-NEXT: ret void + +void t1(int param1, int __maybe_undef param2, int param3) {} + +void t2(int param1, int param2, int param3) { + t1(param1, param2, param3); +} + +// CHECK: define{{.*}} void @TestVariadicFunction(i32 noundef [[TMP0:%.*]], ...) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[TMP0:%.*]], i32* [[TMP1:%.*]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2:%.*]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2:%.*]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = freeze i32 [[TMP2:%.*]] +// CHECK-NEXT: call void (i32, ...) @VariadicFunction(i32 noundef [[TMP6:%.*]], i32 noundef [[TMP4:%.*]], i32 noundef [[TMP5:%.*]]) +// CHECK-NEXT: ret void + +// CHECK: declare{{.*}} void @VariadicFunction(i32 noundef, ...) + +void VariadicFunction(int __maybe_undef x, ...); +void TestVariadicFunction(int x, ...) { + int Var; + return VariadicFunction(x, Var, Var); +} + +// CHECK: define{{.*}} void @other() +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1:%.*]], align 4 +// CHECK-NEXT: call void @func(i32 noundef [[TMP2:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = freeze i32 [[TMP3:%.*]] +// CHECK-NEXT: call void @func1(i32 noundef [[TMP4:%.*]]) +// CHECK-NEXT: ret void + +// CHECK: define{{.*}} void @func(i32 noundef [[TMP1:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[TMP2:%.*]], align 4 +// CHECK-NEXT: ret void + +// CHECK: define{{.*}} void @func1(i32 noundef [[TMP1:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[TMP2:%.*]], align 4 +// CHECK-NEXT: ret void + +void func(int param); +void func1(int __maybe_undef param); + +void other() { + int Var; + func(Var); + func1(Var); +} + +void func(__maybe_undef int param) {} +void func1(int param) {} + +// CHECK: define{{.*}} void @foo(i32 noundef [[TMP1:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[TMP2:%.*]], align 4 +// CHECK-NEXT: ret void + +// CHECK: define{{.*}} void @bar() +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1:%.*]], align 4 +// CHECK-NEXT: call void @foo(i32 noundef [[TMP2:%.*]]) +// CHECK-NEXT: ret void + +void foo(__maybe_undef int param); +void foo(int param) {} + +void bar() { + int Var; + foo(Var); +} diff --git a/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip b/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip -fcuda-is-device -emit-llvm %s \ +// RUN: -o - | FileCheck %s + +// CHECK: define dso_local amdgpu_kernel void @_Z13shufflekernelv() +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast i32 addrspace(5)* [[TMP1:%.*]] to i32* +// CHECK-NEXT: [[TMP4:%.*]] = addrspacecast i32 addrspace(5)* [[TMP2:%.*]] to i32* +// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3:%.*]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = freeze i32 [[TMP5:%.*]] +// CHECK-NEXT: %call = call noundef i32 @_Z11__shfl_synciii(i32 noundef [[TMP6:%.*]], i32 noundef 64, i32 noundef 0) #4 +// CHECK-NEXT: store i32 %call, i32* [[TMP4:%.*]], align 4 +// CHECK-NEXT: ret void + +// CHECK: define linkonce_odr noundef i32 @_Z11__shfl_synciii(i32 noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]], i32 noundef [[TMP3:%.*]]) + +#define __global__ __attribute__((global)) +#define __device__ __attribute__((device)) +#define __maybe_undef __attribute__((maybe_undef)) +#define WARP_SIZE 64 + +static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE; + +__device__ static inline unsigned int __lane_id() { + return __builtin_amdgcn_mbcnt_hi( + -1, __builtin_amdgcn_mbcnt_lo(-1, 0)); +} + +__device__ +inline +int __shfl_sync(int __maybe_undef var, int src_lane, int width = warpSize) { + int self = __lane_id(); + int index = src_lane + (self & ~(width-1)); + return __builtin_amdgcn_ds_bpermute(index<<2, var); +} + +__global__ void +shufflekernel() +{ + int t; + int res; + res = __shfl_sync(t, WARP_SIZE, 0); +} diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -83,6 +83,7 @@ // CHECK-NEXT: Lockable (SubjectMatchRule_record) // CHECK-NEXT: MIGServerRoutine (SubjectMatchRule_function, SubjectMatchRule_objc_method, SubjectMatchRule_block) // CHECK-NEXT: MSStruct (SubjectMatchRule_record) +// CHECK-NEXT: MaybeUndef (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: MicroMips (SubjectMatchRule_function) // CHECK-NEXT: MinSize (SubjectMatchRule_function, SubjectMatchRule_objc_method) // CHECK-NEXT: MinVectorWidth (SubjectMatchRule_function) diff --git a/clang/test/Sema/attr-maybeundef.c b/clang/test/Sema/attr-maybeundef.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/attr-maybeundef.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +// Decl annotations. +void f(int __attribute__((maybe_undef)) *a); +void (*fp)(int __attribute__((maybe_undef)) handle); +__attribute__((maybe_undef)) int i(); // expected-warning {{'maybe_undef' attribute only applies to parameters}} +int __attribute__((maybe_undef)) a; // expected-warning {{'maybe_undef' attribute only applies to parameters}} +int (* __attribute__((maybe_undef)) fpt)(char *); // expected-warning {{'maybe_undef' attribute only applies to parameters}} +void h(int *a __attribute__((maybe_undef("RandomString")))); // expected-error {{'maybe_undef' attribute takes no arguments}} + +// Type annotations. +int __attribute__((maybe_undef)) ta; // expected-warning {{'maybe_undef' attribute only applies to parameters}} + +// Typedefs. +typedef int callback(char *) __attribute__((maybe_undef)); // expected-warning {{'maybe_undef' attribute only applies to parameters}}