Index: clang/include/clang/Basic/Attr.td =================================================================== --- clang/include/clang/Basic/Attr.td +++ clang/include/clang/Basic/Attr.td @@ -2023,6 +2023,13 @@ let Documentation = [NoEscapeDocs]; } +def MayBeUndef : InheritableAttr { + let Spellings = [Clang<"maybe_undef">]; + let Subjects = SubjectList<[ParmVar]>; + let Documentation = [MayBeUndefDocs]; + let SimpleHandler = 1; +} + def AssumeAligned : InheritableAttr { let Spellings = [GCC<"assume_aligned">]; let Subjects = SubjectList<[ObjCMethod, Function]>; Index: clang/include/clang/Basic/AttrDocs.td =================================================================== --- clang/include/clang/Basic/AttrDocs.td +++ clang/include/clang/Basic/AttrDocs.td @@ -257,6 +257,26 @@ }]; } +def MayBeUndefDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The ``maybe_undef`` attribute can be placed on function parameter. It indicates +that the parameter is allowed to use undef values. It informs the compiler +to insert a freeze LLVM IR instruction on the function parameter. + +In languages HIP, CUDA etc., some functions have multi-threaded semantics and +it is enough for only one or some threads to provide defined arguments. +Depending on semantics, undef arguments in some threads don't produce +undefined results in the function call. Since, these functions accept undefined +arguments, ``maybe_undef`` attribute can be placed. + +Sample usage: +.. code-block:: c + + void maybeundeffunc(int __attribute__((maybe_undef))param); + }]; +} + def CarriesDependencyDocs : Documentation { let Category = DocCatFunction; let Content = [{ Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -2046,6 +2046,25 @@ return false; } +/// Check if the argument of a function has maybe_undef attribute. +static bool IsArgumentMayBeUndef(const Decl *TargetDecl, unsigned ArgNo) { + if (!TargetDecl) + return false; + + bool ArgHasMayBeUndefAttr = false; + if (TargetDecl) { + if (const FunctionDecl *FD = dyn_cast(TargetDecl)) { + if (FD && (ArgNo < FD->getNumParams())) { + const ParmVarDecl *Param = FD->getParamDecl(ArgNo); + if (Param && Param->hasAttr()) { + ArgHasMayBeUndefAttr = true; + } + } + } + } + return ArgHasMayBeUndefAttr; +} + /// Construct the IR attribute list of a function or call. /// /// When adding an attribute, please consider where it should be handled: @@ -4816,6 +4835,8 @@ unsigned FirstIRArg, NumIRArgs; std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo); + bool ArgHasMayBeUndefAttr = IsArgumentMayBeUndef(TargetDecl, ArgNo); + switch (ArgInfo.getKind()) { case ABIArgInfo::InAlloca: { assert(NumIRArgs == 0); @@ -4874,7 +4895,11 @@ // Make a temporary alloca to pass the argument. Address Addr = CreateMemTempWithoutCast( I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp"); - IRCallArgs[FirstIRArg] = Addr.getPointer(); + + llvm::Value *Val = Addr.getPointer(); + if (ArgHasMayBeUndefAttr) + Val = Builder.CreateFreeze(Addr.getPointer()); + IRCallArgs[FirstIRArg] = Val; I->copyInto(*this, Addr); } else { @@ -4932,7 +4957,10 @@ // Create an aligned temporary, and copy to it. Address AI = CreateMemTempWithoutCast( I->Ty, ArgInfo.getIndirectAlign(), "byval-temp"); - IRCallArgs[FirstIRArg] = AI.getPointer(); + llvm::Value *Val = AI.getPointer(); + if (ArgHasMayBeUndefAttr) + Val = Builder.CreateFreeze(AI.getPointer()); + IRCallArgs[FirstIRArg] = Val; // Emit lifetime markers for the temporary alloca. llvm::TypeSize ByvalTempElementSize = @@ -4951,9 +4979,13 @@ auto *T = llvm::PointerType::getWithSamePointeeType( cast(V->getType()), CGM.getDataLayout().getAllocaAddrSpace()); - IRCallArgs[FirstIRArg] = getTargetHooks().performAddrSpaceCast( + + llvm::Value *Val = getTargetHooks().performAddrSpaceCast( *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T, true); + if (ArgHasMayBeUndefAttr) + Val = Builder.CreateFreeze(Val); + IRCallArgs[FirstIRArg] = Val; } } break; @@ -5007,6 +5039,8 @@ V->getType() != IRFuncTy->getParamType(FirstIRArg)) V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg)); + if (ArgHasMayBeUndefAttr) + V = Builder.CreateFreeze(V); IRCallArgs[FirstIRArg] = V; break; } @@ -5051,6 +5085,8 @@ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Address EltPtr = Builder.CreateStructGEP(Src, i); llvm::Value *LI = Builder.CreateLoad(EltPtr); + if (ArgHasMayBeUndefAttr) + LI = Builder.CreateFreeze(LI); IRCallArgs[FirstIRArg + i] = LI; } } else { @@ -5067,6 +5103,9 @@ if (ATy != nullptr && isa(I->Ty.getCanonicalType())) Load = EmitCMSEClearRecord(Load, ATy, I->Ty); } + + if (ArgHasMayBeUndefAttr) + Load = Builder.CreateFreeze(Load); IRCallArgs[FirstIRArg] = Load; } @@ -5112,6 +5151,8 @@ if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; Address eltAddr = Builder.CreateStructGEP(addr, i); llvm::Value *elt = Builder.CreateLoad(eltAddr); + if (ArgHasMayBeUndefAttr) + elt = Builder.CreateFreeze(elt); IRCallArgs[IRArgPos++] = elt; } assert(IRArgPos == FirstIRArg + NumIRArgs); Index: clang/lib/Sema/SemaDeclAttr.cpp =================================================================== --- clang/lib/Sema/SemaDeclAttr.cpp +++ clang/lib/Sema/SemaDeclAttr.cpp @@ -8634,6 +8634,9 @@ case ParsedAttr::AT_NoEscape: handleNoEscapeAttr(S, D, AL); break; + case ParsedAttr::AT_MayBeUndef: + handleSimpleAttribute(S, D, AL); + break; case ParsedAttr::AT_AssumeAligned: handleAssumeAlignedAttr(S, D, AL); break; Index: clang/test/CodeGen/attr-maybeundef.c =================================================================== --- /dev/null +++ clang/test/CodeGen/attr-maybeundef.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s + +#define __maybe_undef __attribute__((maybe_undef)) + +// CHECK: define dso_local void @t1(i32 noundef %param1, i32 noundef %param2, float noundef %param3) #[[attr1:[0-9]+]] +void t1(int param1, int __maybe_undef param2, float param3) {} + +// CHECK: define dso_local void @t2(i32 noundef %param1, i32 noundef %param2, float noundef %param3) +// CHECK: [[TMP1:%.*]] = freeze i32 [[TMP2:%.*]] +// CHECK: call void @t1(i32 noundef %0, i32 noundef [[TMP1:%.*]], float noundef %2) +// expected-error {{'maybe_undef' attribute only applies to parameters [-Wignored-attributes]}} +void __maybe_undef t2(int param1, int param2, float param3) { + t1(param1, param2, param3); +} Index: clang/test/CodeGenHIP/maybe_undef-attr-verify.hip =================================================================== --- /dev/null +++ clang/test/CodeGenHIP/maybe_undef-attr-verify.hip @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip -fcuda-is-device -emit-llvm %s \ +// RUN: -o - | FileCheck %s + +#define __global__ __attribute__((global)) +#define __device__ __attribute__((device)) +#define __maybe_undef __attribute__((maybe_undef)) +#define WARP_SIZE 64 + +static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE; + +__device__ static inline unsigned int __lane_id() { + return __builtin_amdgcn_mbcnt_hi( + -1, __builtin_amdgcn_mbcnt_lo(-1, 0)); +} + +__device__ +inline +int __shfl_sync(int __maybe_undef var, int src_lane, int width = warpSize) { + int self = __lane_id(); + int index = src_lane + (self & ~(width-1)); + return __builtin_amdgcn_ds_bpermute(index<<2, var); +} + +__global__ void +shufflekernel() +{ + int t; + int res; + res = __shfl_sync(t, WARP_SIZE, 0); +} + +// CHECK: define dso_local amdgpu_kernel void @_Z13shufflekernelv() +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast i32 addrspace(5)* [[TMP1:%.*]] to i32* +// CHECK-NEXT: [[TMP4:%.*]] = addrspacecast i32 addrspace(5)* [[TMP2:%.*]] to i32* +// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3:%.*]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = freeze i32 [[TMP5:%.*]] +// CHECK-NEXT: %call = call noundef i32 @_Z11__shfl_synciii(i32 noundef [[TMP6:%.*]], i32 noundef 64, i32 noundef 0) #4 +// CHECK-NEXT: store i32 %call, i32* [[TMP4:%.*]], align 4 +// CHECK-NEXT: ret void + +// CHECK: define linkonce_odr noundef i32 @_Z11__shfl_synciii(i32 noundef %var, i32 noundef %src_lane, i32 noundef %width) Index: clang/test/Misc/pragma-attribute-supported-attributes-list.test =================================================================== --- clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -83,6 +83,7 @@ // CHECK-NEXT: Lockable (SubjectMatchRule_record) // CHECK-NEXT: MIGServerRoutine (SubjectMatchRule_function, SubjectMatchRule_objc_method, SubjectMatchRule_block) // CHECK-NEXT: MSStruct (SubjectMatchRule_record) +// CHECK-NEXT: MayBeUndef (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: MicroMips (SubjectMatchRule_function) // CHECK-NEXT: MinSize (SubjectMatchRule_function, SubjectMatchRule_objc_method) // CHECK-NEXT: MinVectorWidth (SubjectMatchRule_function)