Index: include/clang-c/Index.h =================================================================== --- include/clang-c/Index.h +++ include/clang-c/Index.h @@ -3022,7 +3022,7 @@ CXCallingConv_X86Pascal = 5, CXCallingConv_AAPCS = 6, CXCallingConv_AAPCS_VFP = 7, - /* Value 8 was PnaclCall, but it was never used, so it could safely be re-used. */ + CXCallingConv_X86RegCall = 8, CXCallingConv_IntelOclBicc = 9, CXCallingConv_X86_64Win64 = 10, CXCallingConv_X86_64SysV = 11, Index: include/clang/AST/Type.h =================================================================== --- include/clang/AST/Type.h +++ include/clang/AST/Type.h @@ -1378,7 +1378,7 @@ /// Extra information which affects how the function is called, like /// regparm and the calling convention. - unsigned ExtInfo : 9; + unsigned ExtInfo : 10; /// Used only by FunctionProtoType, put here to pack with the /// other bitfields. @@ -2907,19 +2907,19 @@ // * AST read and write // * Codegen class ExtInfo { - // Feel free to rearrange or add bits, but if you go over 9, + // Feel free to rearrange or add bits, but if you go over 10, // you'll need to adjust both the Bits field below and // Type::FunctionTypeBitfields. // | CC |noreturn|produces|regparm| - // |0 .. 3| 4 | 5 | 6 .. 8| + // |0 .. 4| 5 | 6 | 7 .. 9| // // regparm is either 0 (no regparm attribute) or the regparm value+1. - enum { CallConvMask = 0xF }; - enum { NoReturnMask = 0x10 }; - enum { ProducesResultMask = 0x20 }; + enum { CallConvMask = 0x1F }; + enum { NoReturnMask = 0x20 }; + enum { ProducesResultMask = 0x40 }; enum { RegParmMask = ~(CallConvMask | NoReturnMask | ProducesResultMask), - RegParmOffset = 6 }; // Assumed to be the last field + RegParmOffset = 7 }; // Assumed to be the last field uint16_t Bits; @@ -3800,6 +3800,7 @@ attr_fastcall, attr_stdcall, attr_thiscall, + attr_regcall, attr_pascal, attr_swiftcall, attr_vectorcall, Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -810,6 +810,11 @@ let Documentation = [FastCallDocs]; } +def RegCall : InheritableAttr { + let Spellings = [GCC<"regcall">, Keyword<"__regcall">]; + let Documentation = [RegCallDocs]; +} + def Final : InheritableAttr { let Spellings = [Keyword<"final">, Keyword<"sealed">]; let Accessors = [Accessor<"isSpelledAsSealed", [Keyword<"sealed">]>]; Index: include/clang/Basic/AttrDocs.td =================================================================== --- include/clang/Basic/AttrDocs.td +++ include/clang/Basic/AttrDocs.td @@ -1256,6 +1256,18 @@ }]; } +def RegCallDocs : Documentation { + let Category = DocCatCallingConvs; + let Content = [{ +On x86 targets, this attribute changes the calling convention to +`__regcall`_ convention. This convention aims to pass as many arguments +as possible in registers. It also tries to utilize registers for the +return value whenever it is possible. + +.. _`__regcall`: https://software.intel.com/en-us/node/693069 + }]; +} + def ThisCallDocs : Documentation { let Category = DocCatCallingConvs; let Content = [{ Index: include/clang/Basic/Specifiers.h =================================================================== --- include/clang/Basic/Specifiers.h +++ include/clang/Basic/Specifiers.h @@ -237,6 +237,7 @@ CC_X86Pascal, // __attribute__((pascal)) CC_X86_64Win64, // __attribute__((ms_abi)) CC_X86_64SysV, // __attribute__((sysv_abi)) + CC_X86RegCall, // __attribute__((regcall)) CC_AAPCS, // __attribute__((pcs("aapcs"))) CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) @@ -254,6 +255,7 @@ case CC_X86StdCall: case CC_X86FastCall: case CC_X86ThisCall: + case CC_X86RegCall: case CC_X86Pascal: case CC_X86VectorCall: case CC_SpirFunction: Index: include/clang/Basic/TokenKinds.def =================================================================== --- include/clang/Basic/TokenKinds.def +++ include/clang/Basic/TokenKinds.def @@ -502,6 +502,7 @@ KEYWORD(__stdcall , KEYALL) KEYWORD(__fastcall , KEYALL) KEYWORD(__thiscall , KEYALL) +KEYWORD(__regcall , KEYALL) KEYWORD(__vectorcall , KEYALL) KEYWORD(__forceinline , KEYMS) KEYWORD(__unaligned , KEYMS) Index: lib/AST/Expr.cpp =================================================================== --- lib/AST/Expr.cpp +++ lib/AST/Expr.cpp @@ -543,6 +543,7 @@ case CC_X86FastCall: POut << "__fastcall "; break; case CC_X86ThisCall: POut << "__thiscall "; break; case CC_X86VectorCall: POut << "__vectorcall "; break; + case CC_X86RegCall: POut << "__regcall "; break; // Only bother printing the conventions that MSVC knows about. default: break; } Index: lib/AST/ItaniumMangle.cpp =================================================================== --- lib/AST/ItaniumMangle.cpp +++ lib/AST/ItaniumMangle.cpp @@ -493,6 +493,7 @@ void mangleUnscopedTemplateName(TemplateName, const AbiTagList *AdditionalAbiTags); void mangleSourceName(const IdentifierInfo *II); + void mangleRegCallName(const IdentifierInfo *II); void mangleSourceNameWithAbiTags( const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr); void mangleLocalName(const Decl *D, @@ -1241,7 +1242,15 @@ getEffectiveDeclContext(ND)->isFileContext()) Out << 'L'; - mangleSourceName(II); + auto *FD = dyn_cast(ND); + bool IsRegCall = FD && + FD->getType()->castAs()->getCallConv() == + clang::CC_X86RegCall; + if (IsRegCall) + mangleRegCallName(II); + else + mangleSourceName(II); + writeAbiTags(ND, AdditionalAbiTags); break; } @@ -1415,6 +1424,14 @@ } } +void CXXNameMangler::mangleRegCallName(const IdentifierInfo *II) { + // ::= __regcall3__ + // ::= [n] + // ::= + Out << II->getLength() + sizeof("__regcall3__") - 1 << "__regcall3__" + << II->getName(); +} + void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) { // ::= // ::= [n] @@ -2481,6 +2498,7 @@ case CC_X86Pascal: case CC_X86_64Win64: case CC_X86_64SysV: + case CC_X86RegCall: case CC_AAPCS: case CC_AAPCS_VFP: case CC_IntelOclBicc: Index: lib/AST/Mangle.cpp =================================================================== --- lib/AST/Mangle.cpp +++ lib/AST/Mangle.cpp @@ -52,6 +52,7 @@ enum CCMangling { CCM_Other, CCM_Fast, + CCM_RegCall, CCM_Vector, CCM_Std }; @@ -92,6 +93,8 @@ return CCM_Std; case CC_X86VectorCall: return CCM_Vector; + case CC_X86RegCall: + return CCM_RegCall; } } @@ -152,6 +155,8 @@ Out << '_'; else if (CC == CCM_Fast) Out << '@'; + else if (CC == CCM_RegCall) + Out << "__regcall3__"; if (!MCXX) Out << D->getIdentifier()->getName(); Index: lib/AST/MicrosoftMangle.cpp =================================================================== --- lib/AST/MicrosoftMangle.cpp +++ lib/AST/MicrosoftMangle.cpp @@ -2003,6 +2003,7 @@ // ::= I # __fastcall // ::= J # __export __fastcall // ::= Q # __vectorcall + // ::= w # __regcall // The 'export' calling conventions are from a bygone era // (*cough*Win16*cough*) when functions were declared for export with // that keyword. (It didn't actually export them, it just made them so @@ -2020,6 +2021,7 @@ case CC_X86StdCall: Out << 'G'; break; case CC_X86FastCall: Out << 'I'; break; case CC_X86VectorCall: Out << 'Q'; break; + case CC_X86RegCall: Out << 'w'; break; } } void MicrosoftCXXNameMangler::mangleCallingConvention(const FunctionType *T) { Index: lib/AST/Type.cpp =================================================================== --- lib/AST/Type.cpp +++ lib/AST/Type.cpp @@ -2667,6 +2667,7 @@ case CC_X86VectorCall: return "vectorcall"; case CC_X86_64Win64: return "ms_abi"; case CC_X86_64SysV: return "sysv_abi"; + case CC_X86RegCall : return "regcall"; case CC_AAPCS: return "aapcs"; case CC_AAPCS_VFP: return "aapcs-vfp"; case CC_IntelOclBicc: return "intel_ocl_bicc"; @@ -3034,6 +3035,7 @@ case AttributedType::attr_fastcall: case AttributedType::attr_stdcall: case AttributedType::attr_thiscall: + case AttributedType::attr_regcall: case AttributedType::attr_pascal: case AttributedType::attr_swiftcall: case AttributedType::attr_vectorcall: @@ -3091,6 +3093,7 @@ case attr_fastcall: case attr_stdcall: case attr_thiscall: + case attr_regcall: case attr_swiftcall: case attr_vectorcall: case attr_pascal: Index: lib/AST/TypePrinter.cpp =================================================================== --- lib/AST/TypePrinter.cpp +++ lib/AST/TypePrinter.cpp @@ -725,6 +725,9 @@ case CC_X86_64SysV: OS << " __attribute__((sysv_abi))"; break; + case CC_X86RegCall: + OS << " __attribute__((regcall))"; + break; case CC_SpirFunction: case CC_OpenCLKernel: // Do nothing. These CCs are not available as attributes. @@ -1339,6 +1342,7 @@ case AttributedType::attr_pascal: OS << "pascal"; break; case AttributedType::attr_ms_abi: OS << "ms_abi"; break; case AttributedType::attr_sysv_abi: OS << "sysv_abi"; break; + case AttributedType::attr_regcall: OS << "regcall"; break; case AttributedType::attr_pcs: case AttributedType::attr_pcs_vfp: { OS << "pcs("; Index: lib/Basic/Targets.cpp =================================================================== --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -2893,6 +2893,7 @@ case CC_X86FastCall: case CC_X86StdCall: case CC_X86VectorCall: + case CC_X86RegCall: case CC_C: case CC_Swift: case CC_X86Pascal: @@ -4480,6 +4481,7 @@ case CC_X86_64Win64: case CC_PreserveMost: case CC_PreserveAll: + case CC_X86RegCall: return CCCR_OK; default: return CCCR_Warning; @@ -4552,6 +4554,7 @@ case CC_IntelOclBicc: case CC_X86_64SysV: case CC_Swift: + case CC_X86RegCall: return CCCR_OK; default: return CCCR_Warning; Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -48,6 +48,7 @@ default: return llvm::CallingConv::C; case CC_X86StdCall: return llvm::CallingConv::X86_StdCall; case CC_X86FastCall: return llvm::CallingConv::X86_FastCall; + case CC_X86RegCall: return llvm::CallingConv::X86_RegCall; case CC_X86ThisCall: return llvm::CallingConv::X86_ThisCall; case CC_X86_64Win64: return llvm::CallingConv::X86_64_Win64; case CC_X86_64SysV: return llvm::CallingConv::X86_64_SysV; @@ -173,6 +174,9 @@ if (D->hasAttr()) return CC_X86FastCall; + if (D->hasAttr()) + return CC_X86RegCall; + if (D->hasAttr()) return CC_X86ThisCall; Index: lib/CodeGen/CGDebugInfo.cpp =================================================================== --- lib/CodeGen/CGDebugInfo.cpp +++ lib/CodeGen/CGDebugInfo.cpp @@ -904,6 +904,7 @@ case CC_Swift: case CC_PreserveMost: case CC_PreserveAll: + case CC_X86RegCall: return 0; } return 0; Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -673,7 +673,16 @@ } else { IdentifierInfo *II = ND->getIdentifier(); assert(II && "Attempt to mangle unnamed decl."); - Str = II->getName(); + const auto *FD = dyn_cast(ND); + + if (FD && + FD->getType()->castAs()->getCallConv() == CC_X86RegCall) { + llvm::raw_svector_ostream Out(Buffer); + Out << "__regcall3__" << II->getName(); + Str = Out.str(); + } else { + Str = II->getName(); + } } // Keep the first result in the case of a mangling collision. Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -1229,7 +1229,8 @@ const Type *Base = nullptr; uint64_t NumElts = 0; - if (State.CC == llvm::CallingConv::X86_VectorCall && + if ((State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) && isHomogeneousAggregate(RetTy, Base, NumElts)) { // The LLVM struct type for such an aggregate should lower properly. return ABIArgInfo::getDirect(); @@ -1443,7 +1444,8 @@ return true; if (State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall) { + State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) { if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) NeedsPadding = true; @@ -1461,7 +1463,8 @@ return false; if (State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall) { + State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) { if (getContext().getTypeSize(Ty) > 32) return false; @@ -1494,7 +1497,8 @@ // to other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if (State.CC == llvm::CallingConv::X86_VectorCall && + if ((State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) && isHomogeneousAggregate(Ty, Base, NumElts)) { if (State.FreeSSERegs >= NumElts) { State.FreeSSERegs -= NumElts; @@ -1540,7 +1544,8 @@ (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall, + State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall, PaddingType); return getIndirectResult(Ty, true, State); @@ -1591,7 +1596,10 @@ State.FreeSSERegs = 6; } else if (FI.getHasRegParm()) State.FreeRegs = FI.getRegParm(); - else + else if (State.CC == llvm::CallingConv::X86_RegCall) { + State.FreeRegs = 5; + State.FreeSSERegs = 8; + } else State.FreeRegs = DefaultNumRegisterParameters; if (!getCXXABI().classifyReturnType(FI)) { @@ -1932,12 +1940,16 @@ ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType Ty, - unsigned freeIntRegs, - unsigned &neededInt, - unsigned &neededSSE, + ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, + unsigned &neededInt, unsigned &neededSSE, bool isNamedArg) const; + ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE) const; + + ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE) const; + bool IsIllegalVectorType(QualType Ty) const; /// The 0.98 ABI revision clarified a lot of ambiguities, @@ -3283,22 +3295,94 @@ return ABIArgInfo::getDirect(ResType); } +ABIArgInfo +X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE) const { + auto RT = Ty->getAs(); + assert(RT && "classifyRegCallStructType only valid with struct types"); + + if (RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectReturnResult(Ty); + + // Sum up bases + if (auto CXXRD = dyn_cast(RT->getDecl())) { + if (CXXRD->isDynamicClass()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + + for (const auto &I : CXXRD->bases()) + if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE) + .isIndirect()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + } + + // Sum up members + for (const auto *FD : RT->getDecl()->fields()) { + if (FD->getType()->isRecordType() && !FD->getType()->isUnionType()) { + if (classifyRegCallStructTypeImpl(FD->getType(), NeededInt, NeededSSE) + .isIndirect()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + } else { + unsigned LocalNeededInt, LocalNeededSSE; + if (classifyArgumentType(FD->getType(), UINT_MAX, LocalNeededInt, + LocalNeededSSE, true) + .isIndirect()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + NeededInt += LocalNeededInt; + NeededSSE += LocalNeededSSE; + } + } + + return ABIArgInfo::getDirect(); +} + +ABIArgInfo X86_64ABIInfo::classifyRegCallStructType(QualType Ty, + unsigned &NeededInt, + unsigned &NeededSSE) const { + + NeededInt = 0; + NeededSSE = 0; + + return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE); +} + void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + bool IsRegCall = FI.getCallingConvention() == llvm::CallingConv::X86_RegCall; // Keep track of the number of assigned registers. - unsigned freeIntRegs = 6, freeSSERegs = 8; + unsigned FreeIntRegs = IsRegCall ? 11 : 6; + unsigned FreeSSERegs = IsRegCall ? 16 : 8; + unsigned NeededInt, NeededSSE; + + if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && + !FI.getReturnType()->getTypePtr()->isUnionType()) { + FI.getReturnInfo() = + classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE); + if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { + FreeIntRegs -= NeededInt; + FreeSSERegs -= NeededSSE; + } else { + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + } + } else if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); // If the return value is indirect, then the hidden argument is consuming one // integer register. if (FI.getReturnInfo().isIndirect()) - --freeIntRegs; + --FreeIntRegs; // The chain argument effectively gives us another free register. if (FI.isChainCall()) - ++freeIntRegs; + ++FreeIntRegs; unsigned NumRequiredArgs = FI.getNumRequiredArgs(); // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers @@ -3308,19 +3392,21 @@ it != ie; ++it, ++ArgNo) { bool IsNamedArg = ArgNo < NumRequiredArgs; - unsigned neededInt, neededSSE; - it->info = classifyArgumentType(it->type, freeIntRegs, neededInt, - neededSSE, IsNamedArg); + if (IsRegCall && it->type->isStructureOrClassType()) + it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE); + else + it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt, + NeededSSE, IsNamedArg); // AMD64-ABI 3.2.3p3: If there are no registers available for any // eightbyte of an argument, the whole argument is passed on the // stack. If registers have already been assigned for some // eightbytes of such an argument, the assignments get reverted. - if (freeIntRegs >= neededInt && freeSSERegs >= neededSSE) { - freeIntRegs -= neededInt; - freeSSERegs -= neededSSE; + if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { + FreeIntRegs -= NeededInt; + FreeSSERegs -= NeededSSE; } else { - it->info = getIndirectResult(it->type, freeIntRegs); + it->info = getIndirectResult(it->type, FreeIntRegs); } } } @@ -3643,14 +3729,27 @@ void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { bool IsVectorCall = FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall; + bool IsRegCall = FI.getCallingConvention() == llvm::CallingConv::X86_RegCall; + + unsigned FreeSSERegs = 0; + if (IsVectorCall) { + // We can use up to 4 SSE return registers with vectorcall. + FreeSSERegs = 4; + } else if (IsRegCall) { + // RegCall gives us 16 SSE registers. + FreeSSERegs = 16; + } - // We can use up to 4 SSE return registers with vectorcall. - unsigned FreeSSERegs = IsVectorCall ? 4 : 0; if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true); - // We can use up to 6 SSE register parameters with vectorcall. - FreeSSERegs = IsVectorCall ? 6 : 0; + if (IsVectorCall) { + // We can use up to 6 SSE register parameters with vectorcall. + FreeSSERegs = 6; + } else if (IsRegCall) { + FreeSSERegs = 16; + } + for (auto &I : FI.arguments()) I.info = classify(I.type, FreeSSERegs, false); } Index: lib/Parse/ParseDecl.cpp =================================================================== --- lib/Parse/ParseDecl.cpp +++ lib/Parse/ParseDecl.cpp @@ -605,6 +605,7 @@ case tok::kw___fastcall: case tok::kw___stdcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___cdecl: case tok::kw___vectorcall: case tok::kw___ptr64: @@ -3137,6 +3138,7 @@ case tok::kw___stdcall: case tok::kw___fastcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___vectorcall: ParseMicrosoftTypeAttributes(DS.getAttributes()); continue; @@ -4454,6 +4456,7 @@ case tok::kw___stdcall: case tok::kw___fastcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___vectorcall: case tok::kw___w64: case tok::kw___ptr64: @@ -4638,6 +4641,7 @@ case tok::kw___stdcall: case tok::kw___fastcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___vectorcall: case tok::kw___w64: case tok::kw___sptr: @@ -4876,6 +4880,7 @@ case tok::kw___stdcall: case tok::kw___fastcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___vectorcall: if (AttrReqs & AR_DeclspecAttributesParsed) { ParseMicrosoftTypeAttributes(DS.getAttributes()); Index: lib/Parse/ParseTentative.cpp =================================================================== --- lib/Parse/ParseTentative.cpp +++ lib/Parse/ParseTentative.cpp @@ -909,7 +909,7 @@ // '(' abstract-declarator ')' if (Tok.isOneOf(tok::kw___attribute, tok::kw___declspec, tok::kw___cdecl, tok::kw___stdcall, tok::kw___fastcall, tok::kw___thiscall, - tok::kw___vectorcall)) + tok::kw___regcall, tok::kw___vectorcall)) return TPResult::True; // attributes indicate declaration TPResult TPR = TryParseDeclarator(mayBeAbstract, mayHaveIdentifier); if (TPR != TPResult::Ambiguous) @@ -1058,6 +1058,7 @@ case tok::kw___stdcall: case tok::kw___fastcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___vectorcall: case tok::kw___unaligned: case tok::kw___vector: @@ -1351,6 +1352,7 @@ case tok::kw___stdcall: case tok::kw___fastcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___vectorcall: case tok::kw___w64: case tok::kw___sptr: Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -3839,6 +3839,10 @@ SysVABIAttr(Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex())); return; + case AttributeList::AT_RegCall: + D->addAttr(::new (S.Context) RegCallAttr( + Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex())); + return; case AttributeList::AT_Pcs: { PcsAttr::PCSType PCS; switch (CC) { @@ -3900,6 +3904,7 @@ case AttributeList::AT_Pascal: CC = CC_X86Pascal; break; case AttributeList::AT_SwiftCall: CC = CC_Swift; break; case AttributeList::AT_VectorCall: CC = CC_X86VectorCall; break; + case AttributeList::AT_RegCall: CC = CC_X86RegCall; break; case AttributeList::AT_MSABI: CC = Context.getTargetInfo().getTriple().isOSWindows() ? CC_C : CC_X86_64Win64; @@ -5865,6 +5870,7 @@ case AttributeList::AT_FastCall: case AttributeList::AT_ThisCall: case AttributeList::AT_Pascal: + case AttributeList::AT_RegCall: case AttributeList::AT_SwiftCall: case AttributeList::AT_VectorCall: case AttributeList::AT_MSABI: Index: lib/Sema/SemaType.cpp =================================================================== --- lib/Sema/SemaType.cpp +++ lib/Sema/SemaType.cpp @@ -106,6 +106,7 @@ case AttributeList::AT_FastCall: \ case AttributeList::AT_StdCall: \ case AttributeList::AT_ThisCall: \ + case AttributeList::AT_RegCall: \ case AttributeList::AT_Pascal: \ case AttributeList::AT_SwiftCall: \ case AttributeList::AT_VectorCall: \ @@ -4737,6 +4738,8 @@ return AttributeList::AT_StdCall; case AttributedType::attr_thiscall: return AttributeList::AT_ThisCall; + case AttributedType::attr_regcall: + return AttributeList::AT_RegCall; case AttributedType::attr_pascal: return AttributeList::AT_Pascal; case AttributedType::attr_swiftcall: @@ -6073,6 +6076,8 @@ return AttributedType::attr_stdcall; case AttributeList::AT_ThisCall: return AttributedType::attr_thiscall; + case AttributeList::AT_RegCall: + return AttributedType::attr_regcall; case AttributeList::AT_Pascal: return AttributedType::attr_pascal; case AttributeList::AT_SwiftCall: Index: test/CodeGen/regcall.c =================================================================== --- test/CodeGen/regcall.c +++ test/CodeGen/regcall.c @@ -0,0 +1,120 @@ +// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-win32 | FileCheck %s --check-prefix=Win32 +// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=Win64 +// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-linux-gnu | FileCheck %s --check-prefix=Lin32 +// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-linux-gnu | FileCheck %s --check-prefix=Lin64 + +#include + +void __regcall v1(int a, int b) {} +// Win32: define x86_regcallcc void @"\01__regcall3__v1@8"(i32 inreg %a, i32 inreg %b) +// Win64: define x86_regcallcc void @"\01__regcall3__v1@16"(i32 %a, i32 %b) +// Lin32: define x86_regcallcc void @__regcall3__v1(i32 inreg %a, i32 inreg %b) +// Lin64: define x86_regcallcc void @__regcall3__v1(i32 %a, i32 %b) + +void __attribute__((regcall)) v1b(int a, int b) {} +// Win32: define x86_regcallcc void @"\01__regcall3__v1b@8"(i32 inreg %a, i32 inreg %b) +// Win64: define x86_regcallcc void @"\01__regcall3__v1b@16"(i32 %a, i32 %b) +// Lin32: define x86_regcallcc void @__regcall3__v1b(i32 inreg %a, i32 inreg %b) +// Lin64: define x86_regcallcc void @__regcall3__v1b(i32 %a, i32 %b) + +void __regcall v2(char a, char b) {} +// Win32: define x86_regcallcc void @"\01__regcall3__v2@8"(i8 inreg signext %a, i8 inreg signext %b) +// Win64: define x86_regcallcc void @"\01__regcall3__v2@16"(i8 %a, i8 %b) +// Lin32: define x86_regcallcc void @__regcall3__v2(i8 inreg signext %a, i8 inreg signext %b) +// Lin64: define x86_regcallcc void @__regcall3__v2(i8 signext %a, i8 signext %b) + +struct Small { int x; }; +void __regcall v3(int a, struct Small b, int c) {} +// Win32: define x86_regcallcc void @"\01__regcall3__v3@12"(i32 inreg %a, i32 %b.0, i32 inreg %c) +// Win64: define x86_regcallcc void @"\01__regcall3__v3@24"(i32 %a, i32 %b.coerce, i32 %c) +// Lin32: define x86_regcallcc void @__regcall3__v3(i32 inreg %a, i32 inreg, i32 %b.0, i32 inreg %c) +// Lin64: define x86_regcallcc void @__regcall3__v3(i32 %a, i32 %b.coerce, i32 %c) + +struct Large { int a[5]; }; +void __regcall v4(int a, struct Large b, int c) {} +// Win32: define x86_regcallcc void @"\01__regcall3__v4@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c) +// Win64: define x86_regcallcc void @"\01__regcall3__v4@40"(i32 %a, %struct.Large* %b, i32 %c) +// Lin32: define x86_regcallcc void @__regcall3__v4(i32 inreg %a, %struct.Large* byval align 4 %b, i32 %c) +// Lin64: define x86_regcallcc void @__regcall3__v4(i32 %a, [5 x i32] %b.coerce, i32 %c) + +struct HFA2 { double x, y; }; +struct HFA4 { double w, x, y, z; }; +struct HFA5 { double v, w, x, y, z; }; + +void __regcall hfa1(int a, struct HFA4 b, int c) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hfa1@40"(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c) +// Win64: define x86_regcallcc void @"\01__regcall3__hfa1@48"(i32 %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 %c) +// Lin32: define x86_regcallcc void @__regcall3__hfa1(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c) +// Lin64: define x86_regcallcc void @__regcall3__hfa1(i32 %a, double %b.coerce0, double %b.coerce1, double %b.coerce2, double %b.coerce3, i32 %c) + +// HFAs that would require more than six total SSE registers are passed +// indirectly. Additional vector arguments can consume the rest of the SSE +// registers. +void __regcall hfa2(struct HFA4 a, struct HFA4 b, double c) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hfa2@72"(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double* inreg) +// Win64: define x86_regcallcc void @"\01__regcall3__hfa2@72"(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double %c) +// Lin32: define x86_regcallcc void @__regcall3__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double* inreg) +// Lin64: define x86_regcallcc void @__regcall3__hfa2(double %a.coerce0, double %a.coerce1, double %a.coerce2, double %a.coerce3, double %b.coerce0, double %b.coerce1, double %b.coerce2, double %b.coerce3, double %c) + +// Ensure that we pass builtin types directly while counting them against the +// SSE register usage. +void __regcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hfa3@56"(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1) +// Win64: define x86_regcallcc void @"\01__regcall3__hfa3@56"(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1) +// Lin32: define x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1) +// Lin64: define x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.coerce0, double %f.coerce1) + +// Aggregates with more than four elements are not HFAs and are passed byval. +// Because they are not classified as homogeneous, they don't get special +// handling to ensure alignment. +void __regcall hfa4(struct HFA5 a) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hfa4@40"(%struct.HFA5* byval align 4) +// Win64: define x86_regcallcc void @"\01__regcall3__hfa4@40"(%struct.HFA5* %a) +// Lin32: define x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* byval align 4 %a) +// Lin64: define x86_regcallcc void @__regcall3__hfa4(double %a.coerce0, double %a.coerce1, double %a.coerce2, double %a.coerce3, double %a.coerce4) + +// Return HFAs of 4 or fewer elements in registers. +static struct HFA2 g_hfa2; +struct HFA2 __regcall hfa5(void) { return g_hfa2; } +// Win32: define x86_regcallcc %struct.HFA2 @"\01__regcall3__hfa5@0"() +// Win64: define x86_regcallcc %struct.HFA2 @"\01__regcall3__hfa5@0"() +// Lin32: define x86_regcallcc %struct.HFA2 @__regcall3__hfa5() +// Lin64: define x86_regcallcc %struct.HFA2 @__regcall3__hfa5() + +typedef float __attribute__((vector_size(16))) v4f32; +struct HVA2 { v4f32 x, y; }; +struct HVA4 { v4f32 w, x, y, z; }; + +void __regcall hva1(int a, struct HVA4 b, int c) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hva1@72"(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c) +// Win64: define x86_regcallcc void @"\01__regcall3__hva1@80"(i32 %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 %c) +// Lin32: define x86_regcallcc void @__regcall3__hva1(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c) +// Lin64: define x86_regcallcc void @__regcall3__hva1(i32 %a, <4 x float> %b.coerce0, <4 x float> %b.coerce1, <4 x float> %b.coerce2, <4 x float> %b.coerce3, i32 %c) + +void __regcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hva2@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float>* inreg) +// Win64: define x86_regcallcc void @"\01__regcall3__hva2@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float> %c) +// Lin32: define x86_regcallcc void @__regcall3__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float>* inreg) +// Lin64: define x86_regcallcc void @__regcall3__hva2(<4 x float> %a.coerce0, <4 x float> %a.coerce1, <4 x float> %a.coerce2, <4 x float> %a.coerce3, <4 x float> %b.coerce0, <4 x float> %b.coerce1, <4 x float> %b.coerce2, <4 x float> %b.coerce3, <4 x float> %c) + +void __regcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hva3@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1) +// Win64: define x86_regcallcc void @"\01__regcall3__hva3@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1) +// Lin32: define x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1) +// Lin64: define x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.coerce0, <4 x float> %f.coerce1) + +typedef float __attribute__((ext_vector_type(3))) v3f32; +struct OddSizeHVA { v3f32 x, y; }; + +void __regcall odd_size_hva(struct OddSizeHVA a) {} +// Win32: define x86_regcallcc void @"\01__regcall3__odd_size_hva@32"(<3 x float> %a.0, <3 x float> %a.1) +// Win64: define x86_regcallcc void @"\01__regcall3__odd_size_hva@32"(<3 x float> %a.0, <3 x float> %a.1) +// Lin32: define x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1) +// Lin64: define x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.coerce0, <3 x float> %a.coerce1) + +struct HFA6 { __m128 f[4]; }; +struct HFA6 __regcall ret_reg_reused(struct HFA6 a, struct HFA6 b, struct HFA6 c, struct HFA6 d){ struct HFA6 h; return h;} +// Win32: define x86_regcallcc %struct.HFA6 @"\01__regcall3__ret_reg_reused@256"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, %struct.HFA6* inreg %c, %struct.HFA6* inreg %d) +// Win64: define x86_regcallcc %struct.HFA6 @"\01__regcall3__ret_reg_reused@256"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float> %c.0, <4 x float> %c.1, <4 x float> %c.2, <4 x float> %c.3, <4 x float> %d.0, <4 x float> %d.1, <4 x float> %d.2, <4 x float> %d.3) +// Lin32: define x86_regcallcc %struct.HFA6 @__regcall3__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, %struct.HFA6* inreg %c, %struct.HFA6* inreg %d) +// Lin64: define x86_regcallcc %struct.HFA6 @__regcall3__ret_reg_reused([4 x <4 x float>] %a.coerce, [4 x <4 x float>] %b.coerce, [4 x <4 x float>] %c.coerce, [4 x <4 x float>] %d.coerce) Index: test/CodeGenCXX/regcall.cpp =================================================================== --- test/CodeGenCXX/regcall.cpp +++ test/CodeGenCXX/regcall.cpp @@ -0,0 +1,97 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -std=c++11 %s -o - | FileCheck -check-prefix=CHECK-LIN -check-prefix=CHECK-LIN64 %s +// RUN: %clang_cc1 -triple i386-linux-gnu -emit-llvm -std=c++11 %s -o - | FileCheck -check-prefix=CHECK-LIN -check-prefix=CHECK-LIN32 %s +// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -std=c++11 %s -o - -DWIN_TEST | FileCheck -check-prefix=CHECK-WIN64 %s +// RUN: %clang_cc1 -triple i386-windows-msvc -emit-llvm -std=c++11 %s -o - -DWIN_TEST | FileCheck -check-prefix=CHECK-WIN32 %s + +int __regcall foo(int i); + +int main() +{ + int p = 0, _data; + auto lambda = [&](int parameter) -> int { + _data = foo(parameter); + return _data; + }; + return lambda(p); +} +// CHECK-LIN: call x86_regcallcc {{.+}} @_Z15__regcall3__foo +// CHECK-WIN64: call x86_regcallcc {{.+}} @"\01?foo@@YwHH@Z" +// CHECK-WIN32: call x86_regcallcc {{.+}} @"\01?foo@@YwHH@Z" + +int __regcall foo (int i){ + return i; +} +// CHECK-LIN: define x86_regcallcc {{.+}}@_Z15__regcall3__foo +// CHECK-WIN64: define x86_regcallcc {{.+}}@"\01?foo@@YwHH@Z" +// CHECK-WIN32: define x86_regcallcc {{.+}}@"\01?foo@@YwHH@Z" + +// used to give a body to test_class functions +static int x = 0; +class test_class { + int a; +public: +#ifndef WIN_TEST + __regcall +#endif + test_class(){++x;} + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classC1Ev + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classC2Ev + // Windows ignores calling convention on constructor/destructors. + // CHECK-WIN64-DAG: define linkonce_odr %class.test_class* @"\01??0test_class@@QEAA@XZ" + // CHECK-WIN32-DAG: define linkonce_odr x86_thiscallcc %class.test_class* @"\01??0test_class@@QAE@XZ" + +#ifndef WIN_TEST + __regcall +#endif + ~test_class(){--x;} + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classD2Ev + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classD1Ev + // Windows ignores calling convention on constructor/destructors. + // CHECK-WIN64-DAG: define linkonce_odr void @"\01??_Dtest_class@@QEAA@XZ" + // CHECK-WIN32-DAG: define linkonce_odr x86_thiscallcc void @"\01??_Dtest_class@@QAE@XZ" + + test_class& __regcall operator+=(const test_class&){ + return *this; + } + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc dereferenceable(4) %class.test_class* @_ZN10test_classpLERKS_ + // CHECK-WIN64-DAG: define linkonce_odr x86_regcallcc dereferenceable(4) %class.test_class* @"\01??Ytest_class@@QEAwAEAV0@AEBV0@@Z" + // CHECK-WIN32-DAG: define linkonce_odr x86_regcallcc dereferenceable(4) %class.test_class* @"\01??Ytest_class@@QAwAAV0@ABV0@@Z" + void __regcall do_thing(){} + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_class20__regcall3__do_thingEv + // CHECK-WIN64-DAG: define linkonce_odr x86_regcallcc void @"\01?do_thing@test_class@@QEAwXXZ" + // CHECK-WIN32-DAG: define linkonce_odr x86_regcallcc void @"\01?do_thing@test_class@@QAwXXZ" + + template + void __regcall tempFunc(T i){} + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_class20__regcall3__tempFuncIiEEvT_ + // CHECK-WIN64-DAG: define linkonce_odr x86_regcallcc void @"\01??$freeTempFunc@H@@YwXH@Z" + // CHECK-WIN32-DAG: define linkonce_odr x86_regcallcc void @"\01??$freeTempFunc@H@@YwXH@Z" +}; + +bool __regcall operator ==(const test_class&, const test_class&){ --x; return false;} +// CHECK-LIN-DAG: define x86_regcallcc zeroext i1 @_ZeqRK10test_classS1_ +// CHECK-WIN64-DAG: define x86_regcallcc zeroext i1 @"\01??8@Yw_NAEBVtest_class@@0@Z" +// CHECK-WIN32-DAG: define x86_regcallcc zeroext i1 @"\01??8@Yw_NABVtest_class@@0@Z" + +test_class __regcall operator""_test_class (unsigned long long) { ++x; return test_class{};} +// CHECK-LIN64-DAG: define x86_regcallcc %class.test_class @_Zli11_test_classy(i64) +// CHECK-LIN32-DAG: define x86_regcallcc void @_Zli11_test_classy(%class.test_class* inreg noalias sret %agg.result, i64) +// CHECK-WIN64-DAG: \01??__K_test_class@@Yw?AVtest_class@@_K@Z" +// CHECK-WIN32-DAG: \01??__K_test_class@@Yw?AVtest_class@@_K@Z" + +template +void __regcall freeTempFunc(T i){} +// CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_Z24__regcall3__freeTempFuncIiEvT_ +// CHECK-WIN64-DAG: define linkonce_odr x86_regcallcc void @"\01??$freeTempFunc@H@@YwXH@Z" +// CHECK-WIN32-DAG: define linkonce_odr x86_regcallcc void @"\01??$freeTempFunc@H@@YwXH@Z" + +// class to force generation of functions +void force_gen() { + test_class t; + test_class t2 = 12_test_class; + t += t2; + auto t3 = 100_test_class; + t3.tempFunc(1); + freeTempFunc(1); + t3.do_thing(); +} Index: tools/libclang/CXType.cpp =================================================================== --- tools/libclang/CXType.cpp +++ tools/libclang/CXType.cpp @@ -531,6 +531,7 @@ TCALLINGCONV(X86FastCall); TCALLINGCONV(X86ThisCall); TCALLINGCONV(X86Pascal); + TCALLINGCONV(X86RegCall); TCALLINGCONV(X86VectorCall); TCALLINGCONV(X86_64Win64); TCALLINGCONV(X86_64SysV);