Index: clang/include/clang/CodeGen/CGFunctionInfo.h =================================================================== --- clang/include/clang/CodeGen/CGFunctionInfo.h +++ clang/include/clang/CodeGen/CGFunctionInfo.h @@ -44,10 +44,23 @@ /// but also emit a zero/sign extension attribute. Extend, - /// Indirect - Pass the argument indirectly via a hidden pointer - /// with the specified alignment (0 indicates default alignment). + /// Indirect - Pass the argument indirectly via a hidden pointer with the + /// specified alignment (0 indicates default alignment) and address space. Indirect, + /// IndirectAliased - Similar to Indirect, but the pointer may be to an + /// object that is otherwise referenced. The object is known to not be + /// modified through any other references for the duration of the call, and + /// the callee must not itself modify the object. Because C allows + /// parameter variables to be modified and guarantees that they have unique + /// addresses, the callee must defensively copy the object into a local + /// variable if it might be modified or its address might be compared. + /// Since those are uncommon, in principle this convention allows programs + /// to avoid copies in more situations. However, it may introduce *extra* + /// copies if the callee fails to prove that a copy is unnecessary and the + /// caller naturally produces an unaliased object for the argument. + IndirectAliased, + /// Ignore - Ignore the argument (treat as void). Useful for void and /// empty structs. Ignore, @@ -86,6 +99,7 @@ unsigned AllocaFieldIndex; // isInAlloca() }; Kind TheKind; + unsigned IndirectAddrSpace : 24; // isIndirect() bool PaddingInReg : 1; bool InAllocaSRet : 1; // isInAlloca() bool InAllocaIndirect : 1;// isInAlloca() @@ -97,7 +111,8 @@ bool SignExt : 1; // isExtend() bool canHavePaddingType() const { - return isDirect() || isExtend() || isIndirect() || isExpand(); + return isDirect() || isExtend() || isIndirect() || isIndirectAliased() || + isExpand(); } void setPaddingType(llvm::Type *T) { assert(canHavePaddingType()); @@ -112,9 +127,10 @@ public: ABIArgInfo(Kind K = Direct) : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K), - PaddingInReg(false), InAllocaSRet(false), InAllocaIndirect(false), - IndirectByVal(false), IndirectRealign(false), SRetAfterThis(false), - InReg(false), CanBeFlattened(false), SignExt(false) {} + IndirectAddrSpace(0), PaddingInReg(false), InAllocaSRet(false), + InAllocaIndirect(false), IndirectByVal(false), IndirectRealign(false), + SRetAfterThis(false), InReg(false), CanBeFlattened(false), + SignExt(false) {} static ABIArgInfo getDirect(llvm::Type *T = nullptr, unsigned Offset = 0, llvm::Type *Padding = nullptr, @@ -180,6 +196,19 @@ AI.setPaddingType(Padding); return AI; } + + /// Pass this in memory using the IR byref attribute. + static ABIArgInfo getIndirectAliased(CharUnits Alignment, unsigned AddrSpace, + bool Realign = false, + llvm::Type *Padding = nullptr) { + auto AI = ABIArgInfo(IndirectAliased); + AI.setIndirectAlign(Alignment); + AI.setIndirectRealign(Realign); + AI.setPaddingType(Padding); + AI.setIndirectAddrSpace(AddrSpace); + return AI; + } + static ABIArgInfo getIndirectInReg(CharUnits Alignment, bool ByVal = true, bool Realign = false) { auto AI = getIndirect(Alignment, ByVal, Realign); @@ -259,6 +288,7 @@ bool isExtend() const { return TheKind == Extend; } bool isIgnore() const { return TheKind == Ignore; } bool isIndirect() const { return TheKind == Indirect; } + bool isIndirectAliased() const { return TheKind == IndirectAliased; } bool isExpand() const { return TheKind == Expand; } bool isCoerceAndExpand() const { return TheKind == CoerceAndExpand; } @@ -338,11 +368,11 @@ // Indirect accessors CharUnits getIndirectAlign() const { - assert(isIndirect() && "Invalid kind!"); + assert((isIndirect() || isIndirectAliased()) && "Invalid kind!"); return CharUnits::fromQuantity(IndirectAlign); } void setIndirectAlign(CharUnits IA) { - assert(isIndirect() && "Invalid kind!"); + assert((isIndirect() || isIndirectAliased()) && "Invalid kind!"); IndirectAlign = IA.getQuantity(); } @@ -355,12 +385,22 @@ IndirectByVal = IBV; } + unsigned getIndirectAddrSpace() const { + assert(isIndirectAliased() && "Invalid kind!"); + return IndirectAddrSpace; + } + + void setIndirectAddrSpace(unsigned AddrSpace) { + assert(isIndirectAliased() && "Invalid kind!"); + IndirectAddrSpace = AddrSpace; + } + bool getIndirectRealign() const { - assert(isIndirect() && "Invalid kind!"); + assert((isIndirect() || isIndirectAliased()) && "Invalid kind!"); return IndirectRealign; } void setIndirectRealign(bool IR) { - assert(isIndirect() && "Invalid kind!"); + assert((isIndirect() || isIndirectAliased()) && "Invalid kind!"); IndirectRealign = IR; } Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -1470,6 +1470,7 @@ break; } case ABIArgInfo::Indirect: + case ABIArgInfo::IndirectAliased: IRArgs.NumberOfArgs = 1; break; case ABIArgInfo::Ignore: @@ -1560,6 +1561,7 @@ const ABIArgInfo &retAI = FI.getReturnInfo(); switch (retAI.getKind()) { case ABIArgInfo::Expand: + case ABIArgInfo::IndirectAliased: llvm_unreachable("Invalid ABI kind for return argument"); case ABIArgInfo::Extend: @@ -1637,7 +1639,12 @@ CGM.getDataLayout().getAllocaAddrSpace()); break; } - + case ABIArgInfo::IndirectAliased: { + assert(NumIRArgs == 1); + llvm::Type *LTy = ConvertTypeForMem(it->type); + ArgTypes[FirstIRArg] = LTy->getPointerTo(ArgInfo.getIndirectAddrSpace()); + break; + } case ABIArgInfo::Extend: case ABIArgInfo::Direct: { // Fast-isel and the optimizer generally like scalar values better than @@ -2101,6 +2108,7 @@ break; case ABIArgInfo::Expand: + case ABIArgInfo::IndirectAliased: llvm_unreachable("Invalid ABI kind for return argument"); } @@ -2184,6 +2192,9 @@ if (AI.getIndirectByVal()) Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType)); + // TODO: We could add the byref attribute if not byval, but it would + // require updating many testcases. + CharUnits Align = AI.getIndirectAlign(); // In a byval argument, it is important that the required @@ -2206,6 +2217,13 @@ // byval disables readnone and readonly. FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly) .removeAttribute(llvm::Attribute::ReadNone); + + break; + } + case ABIArgInfo::IndirectAliased: { + CharUnits Align = AI.getIndirectAlign(); + Attrs.addByRefAttr(getTypes().ConvertTypeForMem(ParamType)); + Attrs.addAlignmentAttr(Align.getQuantity()); break; } case ABIArgInfo::Ignore: @@ -2434,16 +2452,19 @@ break; } - case ABIArgInfo::Indirect: { + case ABIArgInfo::Indirect: + case ABIArgInfo::IndirectAliased: { assert(NumIRArgs == 1); Address ParamAddr = Address(Fn->getArg(FirstIRArg), ArgI.getIndirectAlign()); if (!hasScalarEvaluationKind(Ty)) { - // Aggregates and complex variables are accessed by reference. All we - // need to do is realign the value, if requested. + // Aggregates and complex variables are accessed by reference. All we + // need to do is realign the value, if requested. Also, if the address + // may be aliased, copy it to ensure that the parameter variable is + // mutable and has a unique adress, as C requires. Address V = ParamAddr; - if (ArgI.getIndirectRealign()) { + if (ArgI.getIndirectRealign() || ArgI.isIndirectAliased()) { Address AlignedTemp = CreateMemTemp(Ty, "coerce"); // Copy from the incoming argument pointer to the temporary with the @@ -3285,8 +3306,8 @@ } break; } - case ABIArgInfo::Expand: + case ABIArgInfo::IndirectAliased: llvm_unreachable("Invalid ABI kind for return argument"); } @@ -4413,7 +4434,8 @@ break; } - case ABIArgInfo::Indirect: { + case ABIArgInfo::Indirect: + case ABIArgInfo::IndirectAliased: { assert(NumIRArgs == 1); if (!I->isAggregate()) { // Make a temporary alloca to pass the argument. @@ -4668,12 +4690,13 @@ break; } - case ABIArgInfo::Expand: + case ABIArgInfo::Expand: { unsigned IRArgPos = FirstIRArg; ExpandTypeToArgs(I->Ty, *I, IRFuncTy, IRCallArgs, IRArgPos); assert(IRArgPos == FirstIRArg + NumIRArgs); break; } + } } const CGCallee &ConcreteCallee = Callee.prepareConcreteCallee(*this); @@ -5084,6 +5107,7 @@ } case ABIArgInfo::Expand: + case ABIArgInfo::IndirectAliased: llvm_unreachable("Invalid ABI kind for return argument"); } Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -257,6 +257,11 @@ << " ByVal=" << getIndirectByVal() << " Realign=" << getIndirectRealign(); break; + case IndirectAliased: + OS << "Indirect Align=" << getIndirectAlign().getQuantity() + << " AadrSpace=" << getIndirectAddrSpace() + << " Realign=" << getIndirectRealign(); + break; case Expand: OS << "Expand"; break; @@ -1989,6 +1994,7 @@ case ABIArgInfo::InAlloca: return true; case ABIArgInfo::Ignore: + case ABIArgInfo::IndirectAliased: return false; case ABIArgInfo::Indirect: case ABIArgInfo::Direct: @@ -8790,18 +8796,31 @@ // TODO: Can we omit empty structs? - llvm::Type *LTy = nullptr; if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) - LTy = CGT.ConvertType(QualType(SeltTy, 0)); + Ty = QualType(SeltTy, 0); + llvm::Type *OrigLTy = CGT.ConvertType(Ty); + llvm::Type *LTy = OrigLTy; if (getContext().getLangOpts().HIP) { - if (!LTy) - LTy = CGT.ConvertType(Ty); LTy = coerceKernelArgumentType( - LTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default), + OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default), /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device)); } + // FIXME: Should also use this for OpenCL, but it requires addressing the + // problem of kernels being called. + // + // FIXME: This doesn't apply the optimization of coercing pointers in structs + // to global address space when using byref. This would require implementing a + // new kind of coercion of the in-memory type when for indirect arguments. + if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy && + isAggregateTypeForABI(Ty)) { + return ABIArgInfo::getIndirectAliased( + getContext().getTypeAlignInChars(Ty), + getContext().getTargetAddressSpace(LangAS::opencl_constant), + false /*Realign*/, nullptr /*Padding*/); + } + // If we set CanBeFlattened to true, CodeGen will expand the struct to its // individual elements, which confuses the Clover OpenCL backend; therefore we // have to set it to false here. Other args of getDirect() are just defaults. @@ -9377,6 +9396,7 @@ } case ABIArgInfo::Indirect: + case ABIArgInfo::IndirectAliased: Stride = SlotSize; ArgAddr = Builder.CreateElementBitCast(Addr, ArgPtrTy, "indirect"); ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"), @@ -9742,6 +9762,7 @@ ArgSize = ArgSize.alignTo(SlotSize); break; case ABIArgInfo::Indirect: + case ABIArgInfo::IndirectAliased: Val = Builder.CreateElementBitCast(AP, ArgPtrTy); Val = Address(Builder.CreateLoad(Val), TypeAlign); ArgSize = SlotSize; Index: clang/test/CodeGenCUDA/kernel-args.cu =================================================================== --- clang/test/CodeGenCUDA/kernel-args.cu +++ clang/test/CodeGenCUDA/kernel-args.cu @@ -8,14 +8,14 @@ int a[32]; }; -// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A %x.coerce) +// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}) // NVPTX: define void @_Z6kernel1A(%struct.A* byval(%struct.A) align 4 %x) __global__ void kernel(A x) { } class Kernel { public: - // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %x.coerce) + // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}) // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval(%struct.A) align 4 %x) static __global__ void memberKernel(A x){} template static __global__ void templateMemberKernel(T x) {} @@ -29,11 +29,11 @@ void test() { Kernel K; - // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A %x.coerce) + // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}} // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval(%struct.A) align 4 %x) launch((void*)templateKernel); - // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %x.coerce) + // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}} // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval(%struct.A) align 4 %x) launch((void*)Kernel::templateMemberKernel); } Index: clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -67,7 +67,6 @@ int i2; } struct_of_structs_arg_t; -// CHECK: %union.transparent_u = type { i32 } typedef union { int b1; @@ -237,7 +236,7 @@ // CHECK: void @kernel_struct_of_structs_arg(%struct.struct_of_structs_arg %arg1.coerce) __kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { } -// CHECK: void @test_kernel_transparent_union_arg(%union.transparent_u %u.coerce) +// CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce) __kernel void test_kernel_transparent_union_arg(transparent_u u) { } // CHECK: void @kernel_single_array_element_struct_arg(%struct.single_array_element_struct_arg %arg1.coerce)