Index: include/clang-c/Index.h =================================================================== --- include/clang-c/Index.h +++ include/clang-c/Index.h @@ -3026,6 +3026,7 @@ CXCallingConv_Swift = 13, CXCallingConv_PreserveMost = 14, CXCallingConv_PreserveAll = 15, + CXCallingConv_X86RegCall = 16, CXCallingConv_Invalid = 100, CXCallingConv_Unexposed = 200 Index: include/clang/AST/Type.h =================================================================== --- include/clang/AST/Type.h +++ include/clang/AST/Type.h @@ -1378,7 +1378,7 @@ /// Extra information which affects how the function is called, like /// regparm and the calling convention. - unsigned ExtInfo : 9; + unsigned ExtInfo : 10; /// Used only by FunctionProtoType, put here to pack with the /// other bitfields. @@ -2906,19 +2906,19 @@ // * AST read and write // * Codegen class ExtInfo { - // Feel free to rearrange or add bits, but if you go over 9, + // Feel free to rearrange or add bits, but if you go over 10, // you'll need to adjust both the Bits field below and // Type::FunctionTypeBitfields. // | CC |noreturn|produces|regparm| - // |0 .. 3| 4 | 5 | 6 .. 8| + // |0 .. 4| 5 | 6 | 7 .. 9| // // regparm is either 0 (no regparm attribute) or the regparm value+1. - enum { CallConvMask = 0xF }; - enum { NoReturnMask = 0x10 }; - enum { ProducesResultMask = 0x20 }; + enum { CallConvMask = 0x1F }; + enum { NoReturnMask = 0x20 }; + enum { ProducesResultMask = 0x40 }; enum { RegParmMask = ~(CallConvMask | NoReturnMask | ProducesResultMask), - RegParmOffset = 6 }; // Assumed to be the last field + RegParmOffset = 7 }; // Assumed to be the last field uint16_t Bits; @@ -3792,6 +3792,7 @@ attr_fastcall, attr_stdcall, attr_thiscall, + attr_regcall, attr_pascal, attr_swiftcall, attr_vectorcall, Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -810,6 +810,12 @@ let Documentation = [FastCallDocs]; } +def RegCall : InheritableAttr { + let Spellings = [GCC<"regcall">, Keyword<"__regcall">, + Keyword<"_regcall">]; + let Documentation = [RegCallDocs]; +} + def Final : InheritableAttr { let Spellings = [Keyword<"final">, Keyword<"sealed">]; let Accessors = [Accessor<"isSpelledAsSealed", [Keyword<"sealed">]>]; Index: include/clang/Basic/AttrDocs.td =================================================================== --- include/clang/Basic/AttrDocs.td +++ include/clang/Basic/AttrDocs.td @@ -1256,6 +1256,18 @@ }]; } +def RegCallDocs : Documentation { + let Category = DocCatCallingConvs; + let Content = [{ +On x86 targets, this attribute changes the calling convention to +__regcall convention. This convention aimes to pass as many arguments +as possible in registers. It also tries to utilize registers for the +return value whenever it is possible.' + +.. _`__regcall`: https://software.intel.com/en-us/node/512847 + }]; +} + def ThisCallDocs : Documentation { let Category = DocCatCallingConvs; let Content = [{ Index: include/clang/Basic/Specifiers.h =================================================================== --- include/clang/Basic/Specifiers.h +++ include/clang/Basic/Specifiers.h @@ -237,6 +237,7 @@ CC_X86Pascal, // __attribute__((pascal)) CC_X86_64Win64, // __attribute__((ms_abi)) CC_X86_64SysV, // __attribute__((sysv_abi)) + CC_X86RegCall, // __attribute__((regcall)) CC_AAPCS, // __attribute__((pcs("aapcs"))) CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) @@ -254,6 +255,7 @@ case CC_X86StdCall: case CC_X86FastCall: case CC_X86ThisCall: + case CC_X86RegCall: case CC_X86Pascal: case CC_X86VectorCall: case CC_SpirFunction: Index: include/clang/Basic/TokenKinds.def =================================================================== --- include/clang/Basic/TokenKinds.def +++ include/clang/Basic/TokenKinds.def @@ -502,6 +502,7 @@ KEYWORD(__stdcall , KEYALL) KEYWORD(__fastcall , KEYALL) KEYWORD(__thiscall , KEYALL) +KEYWORD(__regcall , KEYALL) KEYWORD(__vectorcall , KEYALL) KEYWORD(__forceinline , KEYMS) KEYWORD(__unaligned , KEYMS) Index: lib/AST/Expr.cpp =================================================================== --- lib/AST/Expr.cpp +++ lib/AST/Expr.cpp @@ -543,7 +543,8 @@ case CC_X86FastCall: POut << "__fastcall "; break; case CC_X86ThisCall: POut << "__thiscall "; break; case CC_X86VectorCall: POut << "__vectorcall "; break; - // Only bother printing the conventions that MSVC knows about. + case CC_X86RegCall: POut << "__regcall "; break; + // Only bother printing the conventions that MSVC knows about. default: break; } } Index: lib/AST/ItaniumMangle.cpp =================================================================== --- lib/AST/ItaniumMangle.cpp +++ lib/AST/ItaniumMangle.cpp @@ -490,7 +490,7 @@ const AbiTagList *AdditionalAbiTags); void mangleUnscopedTemplateName(TemplateName, const AbiTagList *AdditionalAbiTags); - void mangleSourceName(const IdentifierInfo *II); + void mangleSourceName(const IdentifierInfo *II, bool isRegCall = false); void mangleSourceNameWithAbiTags( const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr); void mangleLocalName(const Decl *D, @@ -1231,7 +1231,12 @@ getEffectiveDeclContext(ND)->isFileContext()) Out << 'L'; - mangleSourceName(II); + auto FD = dyn_cast(ND); + bool isRegCall = (FD != nullptr) && + FD->getType()->castAs()->getCallConv() == + clang::CC_X86RegCall; + + mangleSourceName(II, isRegCall); writeAbiTags(ND, AdditionalAbiTags); break; } @@ -1405,11 +1410,16 @@ } } -void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) { - // ::= +void CXXNameMangler::mangleSourceName(const IdentifierInfo *II, bool isRegCall) { + // ::= [__regcall3__] // ::= [n] // ::= - Out << II->getLength() << II->getName(); + if (isRegCall) { + Out << II->getLength() + sizeof("__regcall3__") - 1<< "__regcall3__"; + } else { + Out << II->getLength(); + } + Out << II->getName(); } void CXXNameMangler::mangleNestedName(const NamedDecl *ND, @@ -2471,6 +2481,7 @@ case CC_X86Pascal: case CC_X86_64Win64: case CC_X86_64SysV: + case CC_X86RegCall: case CC_AAPCS: case CC_AAPCS_VFP: case CC_IntelOclBicc: Index: lib/AST/Mangle.cpp =================================================================== --- lib/AST/Mangle.cpp +++ lib/AST/Mangle.cpp @@ -52,6 +52,7 @@ enum CCMangling { CCM_Other, CCM_Fast, + CCM_RegCall, CCM_Vector, CCM_Std }; @@ -66,6 +67,7 @@ const NamedDecl *ND) { const TargetInfo &TI = Context.getTargetInfo(); const llvm::Triple &Triple = TI.getTriple(); + if (!Triple.isOSWindows() || !(Triple.getArch() == llvm::Triple::x86 || Triple.getArch() == llvm::Triple::x86_64)) @@ -92,6 +94,8 @@ return CCM_Std; case CC_X86VectorCall: return CCM_Vector; + case CC_X86RegCall: + return CCM_RegCall; } } @@ -152,6 +156,8 @@ Out << '_'; else if (CC == CCM_Fast) Out << '@'; + else if (CC == CCM_RegCall) + Out << "__regcall3__"; if (!MCXX) Out << D->getIdentifier()->getName(); Index: lib/AST/MicrosoftMangle.cpp =================================================================== --- lib/AST/MicrosoftMangle.cpp +++ lib/AST/MicrosoftMangle.cpp @@ -430,8 +430,14 @@ // ::= ? Out << Prefix; + + if (auto FD = dyn_cast(D)) + if (FD->getType()->castAs()->getCallConv() == + clang::CC_X86RegCall) + Out << "__regcall3__"; + mangleName(D); - if (const FunctionDecl *FD = dyn_cast(D)) + if (const FunctionDecl *FD = dyn_cast(D)) mangleFunctionEncoding(FD, Context.shouldMangleDeclName(FD)); else if (const VarDecl *VD = dyn_cast(D)) mangleVariableEncoding(VD); @@ -2020,6 +2026,8 @@ case CC_X86StdCall: Out << 'G'; break; case CC_X86FastCall: Out << 'I'; break; case CC_X86VectorCall: Out << 'Q'; break; + // Regcall uses a different style of mangling. + case CC_X86RegCall: break; } } void MicrosoftCXXNameMangler::mangleCallingConvention(const FunctionType *T) { Index: lib/AST/Type.cpp =================================================================== --- lib/AST/Type.cpp +++ lib/AST/Type.cpp @@ -2658,6 +2658,7 @@ case CC_X86VectorCall: return "vectorcall"; case CC_X86_64Win64: return "ms_abi"; case CC_X86_64SysV: return "sysv_abi"; + case CC_X86RegCall : return "regcall"; case CC_AAPCS: return "aapcs"; case CC_AAPCS_VFP: return "aapcs-vfp"; case CC_IntelOclBicc: return "intel_ocl_bicc"; @@ -3012,6 +3013,7 @@ case AttributedType::attr_fastcall: case AttributedType::attr_stdcall: case AttributedType::attr_thiscall: + case AttributedType::attr_regcall: case AttributedType::attr_pascal: case AttributedType::attr_swiftcall: case AttributedType::attr_vectorcall: @@ -3069,6 +3071,7 @@ case attr_fastcall: case attr_stdcall: case attr_thiscall: + case attr_regcall: case attr_swiftcall: case attr_vectorcall: case attr_pascal: Index: lib/AST/TypePrinter.cpp =================================================================== --- lib/AST/TypePrinter.cpp +++ lib/AST/TypePrinter.cpp @@ -725,6 +725,9 @@ case CC_X86_64SysV: OS << " __attribute__((sysv_abi))"; break; + case CC_X86RegCall: + OS << " __attribute__((regcall))"; + break; case CC_SpirFunction: case CC_OpenCLKernel: // Do nothing. These CCs are not available as attributes. @@ -1339,6 +1342,7 @@ case AttributedType::attr_pascal: OS << "pascal"; break; case AttributedType::attr_ms_abi: OS << "ms_abi"; break; case AttributedType::attr_sysv_abi: OS << "sysv_abi"; break; + case AttributedType::attr_regcall: OS << "regcall"; break; case AttributedType::attr_pcs: case AttributedType::attr_pcs_vfp: { OS << "pcs("; Index: lib/Basic/Targets.cpp =================================================================== --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -2869,6 +2869,7 @@ case CC_X86FastCall: case CC_X86StdCall: case CC_X86VectorCall: + case CC_X86RegCall: case CC_C: case CC_Swift: case CC_X86Pascal: @@ -4452,6 +4453,7 @@ case CC_X86_64Win64: case CC_PreserveMost: case CC_PreserveAll: + case CC_X86RegCall: return CCCR_OK; default: return CCCR_Warning; @@ -4519,6 +4521,7 @@ case CC_X86VectorCall: case CC_IntelOclBicc: case CC_X86_64SysV: + case CC_X86RegCall: return CCCR_OK; default: return CCCR_Warning; Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -48,6 +48,7 @@ default: return llvm::CallingConv::C; case CC_X86StdCall: return llvm::CallingConv::X86_StdCall; case CC_X86FastCall: return llvm::CallingConv::X86_FastCall; + case CC_X86RegCall: return llvm::CallingConv::X86_RegCall; case CC_X86ThisCall: return llvm::CallingConv::X86_ThisCall; case CC_X86_64Win64: return llvm::CallingConv::X86_64_Win64; case CC_X86_64SysV: return llvm::CallingConv::X86_64_SysV; @@ -173,6 +174,9 @@ if (D->hasAttr()) return CC_X86FastCall; + if (D->hasAttr()) + return CC_X86RegCall; + if (D->hasAttr()) return CC_X86ThisCall; Index: lib/CodeGen/CGDebugInfo.cpp =================================================================== --- lib/CodeGen/CGDebugInfo.cpp +++ lib/CodeGen/CGDebugInfo.cpp @@ -893,6 +893,7 @@ case CC_Swift: case CC_PreserveMost: case CC_PreserveAll: + case CC_X86RegCall: return 0; } return 0; Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -683,7 +683,15 @@ } else { IdentifierInfo *II = ND->getIdentifier(); assert(II && "Attempt to mangle unnamed decl."); - Str = II->getName(); + const FunctionDecl *FD = dyn_cast(ND); + + if (FD && FD->getType()->castAs()->getCallConv() == CC_X86RegCall) { + llvm::raw_svector_ostream Out(Buffer); + Out << "__regcall3__" << II->getName(); + Str = Out.str(); + } else { + Str = II->getName(); + } } // Keep the first result in the case of a mangling collision. Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -1229,7 +1229,8 @@ const Type *Base = nullptr; uint64_t NumElts = 0; - if (State.CC == llvm::CallingConv::X86_VectorCall && + if ((State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) && isHomogeneousAggregate(RetTy, Base, NumElts)) { // The LLVM struct type for such an aggregate should lower properly. return ABIArgInfo::getDirect(); @@ -1443,7 +1444,8 @@ return true; if (State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall) { + State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) { if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) NeedsPadding = true; @@ -1461,7 +1463,8 @@ return false; if (State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall) { + State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) { if (getContext().getTypeSize(Ty) > 32) return false; @@ -1494,7 +1497,8 @@ // to other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if (State.CC == llvm::CallingConv::X86_VectorCall && + if ((State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) && isHomogeneousAggregate(Ty, Base, NumElts)) { if (State.FreeSSERegs >= NumElts) { State.FreeSSERegs -= NumElts; @@ -1540,7 +1544,8 @@ (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall, + State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall, PaddingType); return getIndirectResult(Ty, true, State); @@ -1591,7 +1596,10 @@ State.FreeSSERegs = 6; } else if (FI.getHasRegParm()) State.FreeRegs = FI.getRegParm(); - else + else if (State.CC == llvm::CallingConv::X86_RegCall) { + State.FreeRegs = 5; + State.FreeSSERegs = 8; + } else State.FreeRegs = DefaultNumRegisterParameters; if (!getCXXABI().classifyReturnType(FI)) { @@ -1937,7 +1945,15 @@ unsigned &neededInt, unsigned &neededSSE, bool isNamedArg) const; + + ABIArgInfo classifyRegCallStructType(QualType Ty, + unsigned &neededInt, + unsigned &neededSSE) const; + ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, + unsigned &neededInt, + unsigned &neededSSE) const; + bool IsIllegalVectorType(QualType Ty) const; /// The 0.98 ABI revision clarified a lot of ambiguities, @@ -3277,14 +3293,81 @@ return ABIArgInfo::getDirect(ResType); } +ABIArgInfo X86_64ABIInfo::classifyRegCallStructTypeImpl( + QualType Ty, unsigned &neededInt, unsigned &neededSSE) const { + auto RT = Ty->getAs(); + assert (RT && "classifyRegCallStructType only valid with struct types"); + + if (RT->getDecl()->hasFlexibleArrayMember()) { + return getIndirectReturnResult(Ty); + } + + // Sum up bases + if (auto CXXRD = dyn_cast(RT->getDecl())) + for (const auto &I : CXXRD->bases()) + if (classifyRegCallStructTypeImpl(I.getType(), neededInt, neededSSE).isIndirect()) { + neededInt = neededSSE = 0; + return getIndirectReturnResult(Ty); + } + + + // Sum up members + for (const auto *FD : RT->getDecl()->fields()) { + if (FD->getType()->isStructureType()) { + if (classifyRegCallStructTypeImpl(FD->getType(), neededInt, neededSSE).isIndirect()) { + neededInt = neededSSE = 0; + return getIndirectReturnResult(Ty); + } + } else { + unsigned localNeededInt, localNeededSSE; + if (classifyArgumentType(FD->getType(), (std::numeric_limits::max)(), + localNeededInt, localNeededSSE, true).isIndirect()) { + neededInt = neededSSE = 0; + return getIndirectReturnResult(Ty); + } + else { + neededInt += localNeededInt; + neededSSE += localNeededSSE; + } + } + } + + return ABIArgInfo::getDirect(); +} + +ABIArgInfo X86_64ABIInfo::classifyRegCallStructType( + QualType Ty, unsigned &neededInt, unsigned &neededSSE) const { + + neededInt = 0; + neededSSE = 0; + + return classifyRegCallStructTypeImpl(Ty, neededInt, neededSSE); +} + void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + bool IsRegCall = + FI.getCallingConvention() == llvm::CallingConv::X86_RegCall; // Keep track of the number of assigned registers. - unsigned freeIntRegs = 6, freeSSERegs = 8; + unsigned freeIntRegs = IsRegCall ? 11 : 6; + unsigned freeSSERegs = IsRegCall ? 16 : 8; + unsigned neededInt, neededSSE; + if (IsRegCall && FI.getReturnType()->isStructureType()) { + FI.getReturnInfo() = classifyRegCallStructType(FI.getReturnType(), + neededInt, + neededSSE); + if (freeIntRegs >= neededInt && freeSSERegs >= neededSSE) { + freeIntRegs -= neededInt; + freeSSERegs -= neededSSE; + } else { + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + } + } + else if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + // If the return value is indirect, then the hidden argument is consuming one // integer register. if (FI.getReturnInfo().isIndirect()) @@ -3302,8 +3385,12 @@ it != ie; ++it, ++ArgNo) { bool IsNamedArg = ArgNo < NumRequiredArgs; - unsigned neededInt, neededSSE; - it->info = classifyArgumentType(it->type, freeIntRegs, neededInt, + if (IsRegCall && it->type->isStructureType()) + { + it->info = classifyRegCallStructType(it->type, neededInt, neededSSE); + } + else + it->info = classifyArgumentType(it->type, freeIntRegs, neededInt, neededSSE, IsNamedArg); // AMD64-ABI 3.2.3p3: If there are no registers available for any @@ -3637,14 +3724,28 @@ void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { bool IsVectorCall = FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall; + bool IsRegCall = + FI.getCallingConvention() == llvm::CallingConv::X86_RegCall; - // We can use up to 4 SSE return registers with vectorcall. - unsigned FreeSSERegs = IsVectorCall ? 4 : 0; + unsigned FreeSSERegs = 0; + if (IsVectorCall) { + // We can use up to 4 SSE return registers with vectorcall. + FreeSSERegs = 4; + } else if (IsRegCall) { + // RegCall gives us 16 SSE registers total, return or otherwise. + FreeSSERegs = 16; + } + if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true); - // We can use up to 6 SSE register parameters with vectorcall. - FreeSSERegs = IsVectorCall ? 6 : 0; + // Regcall doesn't differentiate between return and parameter registers, + // and non Reg/Vector call was 0 anyway. + if (IsVectorCall) { + // We can use up to 6 SSE register parameters with vectorcall. + FreeSSERegs = 6; + } + for (auto &I : FI.arguments()) I.info = classify(I.type, FreeSSERegs, false); } Index: lib/Parse/ParseDecl.cpp =================================================================== --- lib/Parse/ParseDecl.cpp +++ lib/Parse/ParseDecl.cpp @@ -605,6 +605,7 @@ case tok::kw___fastcall: case tok::kw___stdcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___cdecl: case tok::kw___vectorcall: case tok::kw___ptr64: @@ -3137,6 +3138,7 @@ case tok::kw___stdcall: case tok::kw___fastcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___vectorcall: ParseMicrosoftTypeAttributes(DS.getAttributes()); continue; @@ -4454,6 +4456,7 @@ case tok::kw___stdcall: case tok::kw___fastcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___vectorcall: case tok::kw___w64: case tok::kw___ptr64: @@ -4638,6 +4641,7 @@ case tok::kw___stdcall: case tok::kw___fastcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___vectorcall: case tok::kw___w64: case tok::kw___sptr: @@ -4876,6 +4880,7 @@ case tok::kw___stdcall: case tok::kw___fastcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___vectorcall: if (AttrReqs & AR_DeclspecAttributesParsed) { ParseMicrosoftTypeAttributes(DS.getAttributes()); Index: lib/Parse/ParseTentative.cpp =================================================================== --- lib/Parse/ParseTentative.cpp +++ lib/Parse/ParseTentative.cpp @@ -909,7 +909,7 @@ // '(' abstract-declarator ')' if (Tok.isOneOf(tok::kw___attribute, tok::kw___declspec, tok::kw___cdecl, tok::kw___stdcall, tok::kw___fastcall, tok::kw___thiscall, - tok::kw___vectorcall)) + tok::kw___regcall, tok::kw___vectorcall)) return TPResult::True; // attributes indicate declaration TPResult TPR = TryParseDeclarator(mayBeAbstract, mayHaveIdentifier); if (TPR != TPResult::Ambiguous) @@ -1058,6 +1058,7 @@ case tok::kw___stdcall: case tok::kw___fastcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___vectorcall: case tok::kw___unaligned: case tok::kw___vector: @@ -1351,6 +1352,7 @@ case tok::kw___stdcall: case tok::kw___fastcall: case tok::kw___thiscall: + case tok::kw___regcall: case tok::kw___vectorcall: case tok::kw___w64: case tok::kw___sptr: Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -8285,7 +8285,7 @@ // Windows system headers sometimes accidentally use stdcall without // (void) parameters, so we relax this to a warning. int DiagID = - CC == CC_X86StdCall ? diag::warn_cconv_knr : diag::err_cconv_knr; + (CC == CC_X86StdCall || CC == CC_X86RegCall) ? diag::warn_cconv_knr : diag::err_cconv_knr; Diag(NewFD->getLocation(), DiagID) << FunctionType::getNameForCallConv(CC); } Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -3830,6 +3830,11 @@ SysVABIAttr(Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex())); return; + case AttributeList::AT_RegCall: + D->addAttr(::new (S.Context) + RegCallAttr(Attr.getRange(), S.Context, + Attr.getAttributeSpellingListIndex())); + return; case AttributeList::AT_Pcs: { PcsAttr::PCSType PCS; switch (CC) { @@ -3891,6 +3896,7 @@ case AttributeList::AT_Pascal: CC = CC_X86Pascal; break; case AttributeList::AT_SwiftCall: CC = CC_Swift; break; case AttributeList::AT_VectorCall: CC = CC_X86VectorCall; break; + case AttributeList::AT_RegCall: CC = CC_X86RegCall; break; case AttributeList::AT_MSABI: CC = Context.getTargetInfo().getTriple().isOSWindows() ? CC_C : CC_X86_64Win64; @@ -5846,6 +5852,7 @@ case AttributeList::AT_FastCall: case AttributeList::AT_ThisCall: case AttributeList::AT_Pascal: + case AttributeList::AT_RegCall: case AttributeList::AT_SwiftCall: case AttributeList::AT_VectorCall: case AttributeList::AT_MSABI: Index: lib/Sema/SemaType.cpp =================================================================== --- lib/Sema/SemaType.cpp +++ lib/Sema/SemaType.cpp @@ -106,6 +106,7 @@ case AttributeList::AT_FastCall: \ case AttributeList::AT_StdCall: \ case AttributeList::AT_ThisCall: \ + case AttributeList::AT_RegCall: \ case AttributeList::AT_Pascal: \ case AttributeList::AT_SwiftCall: \ case AttributeList::AT_VectorCall: \ @@ -4749,6 +4750,8 @@ return AttributeList::AT_StdCall; case AttributedType::attr_thiscall: return AttributeList::AT_ThisCall; + case AttributedType::attr_regcall: + return AttributeList::AT_RegCall; case AttributedType::attr_pascal: return AttributeList::AT_Pascal; case AttributedType::attr_swiftcall: @@ -6087,6 +6090,8 @@ return AttributedType::attr_stdcall; case AttributeList::AT_ThisCall: return AttributedType::attr_thiscall; + case AttributeList::AT_RegCall: + return AttributedType::attr_regcall; case AttributeList::AT_Pascal: return AttributedType::attr_pascal; case AttributeList::AT_SwiftCall: Index: test/CodeGen/regcall.c =================================================================== --- test/CodeGen/regcall.c +++ test/CodeGen/regcall.c @@ -0,0 +1,113 @@ +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-win32 | FileCheck %s --check-prefix=Win32 +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=Win64 +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-linux-gnu | FileCheck %s --check-prefix=Lin32 +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-linux-gnu | FileCheck %s --check-prefix=Lin64 + +void __regcall v1(int a, int b) {} +// Win32: define x86_regcallcc void @"\01__regcall3__v1@8"(i32 inreg %a, i32 inreg %b) +// Win64: define x86_regcallcc void @"\01__regcall3__v1@16"(i32 %a, i32 %b) +// Lin32: define x86_regcallcc void @__regcall3__v1(i32 inreg %a, i32 inreg %b) +// Lin64: define x86_regcallcc void @__regcall3__v1(i32 %a, i32 %b) + +void __attribute__((regcall)) v1b(int a, int b) {} +// Win32: define x86_regcallcc void @"\01__regcall3__v1b@8"(i32 inreg %a, i32 inreg %b) +// Win64: define x86_regcallcc void @"\01__regcall3__v1b@16"(i32 %a, i32 %b) +// Lin32: define x86_regcallcc void @__regcall3__v1b(i32 inreg %a, i32 inreg %b) +// Lin64: define x86_regcallcc void @__regcall3__v1b(i32 %a, i32 %b) + +void __regcall v2(char a, char b) {} +// Win32: define x86_regcallcc void @"\01__regcall3__v2@8"(i8 inreg signext %a, i8 inreg signext %b) +// Win64: define x86_regcallcc void @"\01__regcall3__v2@16"(i8 %a, i8 %b) +// Lin32: define x86_regcallcc void @__regcall3__v2(i8 inreg signext %a, i8 inreg signext %b) +// Lin64: define x86_regcallcc void @__regcall3__v2(i8 signext %a, i8 signext %b) + +struct Small { int x; }; +void __regcall v3(int a, struct Small b, int c) {} +// Win32: define x86_regcallcc void @"\01__regcall3__v3@12"(i32 inreg %a, i32 %b.0, i32 inreg %c) +// Win64: define x86_regcallcc void @"\01__regcall3__v3@24"(i32 %a, i32 %b.coerce, i32 %c) +// Lin32: define x86_regcallcc void @__regcall3__v3(i32 inreg %a, i32 inreg, i32 %b.0, i32 inreg %c) +// Lin64: define x86_regcallcc void @__regcall3__v3(i32 %a, i32 %b.coerce, i32 %c) + +struct Large { int a[5]; }; +void __regcall v4(int a, struct Large b, int c) {} +// Win32: define x86_regcallcc void @"\01__regcall3__v4@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c) +// Win64: define x86_regcallcc void @"\01__regcall3__v4@40"(i32 %a, %struct.Large* %b, i32 %c) +// Lin32: define x86_regcallcc void @__regcall3__v4(i32 inreg %a, %struct.Large* byval align 4 %b, i32 %c) +// Lin64: define x86_regcallcc void @__regcall3__v4(i32 %a, [5 x i32] %b.coerce, i32 %c) +// IT: define x86_regcallcc void @__regcall3__v4(i32 %a, %struct.Large* byval align 8 %b, i32 %c) + +struct HFA2 { double x, y; }; +struct HFA4 { double w, x, y, z; }; +struct HFA5 { double v, w, x, y, z; }; + +void __regcall hfa1(int a, struct HFA4 b, int c) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hfa1@40"(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c) +// Win64: define x86_regcallcc void @"\01__regcall3__hfa1@48"(i32 %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 %c) +// Lin32: define x86_regcallcc void @__regcall3__hfa1(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c) +// Lin64: define x86_regcallcc void @__regcall3__hfa1(i32 %a, double %b.coerce0, double %b.coerce1, double %b.coerce2, double %b.coerce3, i32 %c) + +// HFAs that would require more than six total SSE registers are passed +// indirectly. Additional vector arguments can consume the rest of the SSE +// registers. +void __regcall hfa2(struct HFA4 a, struct HFA4 b, double c) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hfa2@72"(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double* inreg) +// Win64: define x86_regcallcc void @"\01__regcall3__hfa2@72"(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double %c) +// Lin32: define x86_regcallcc void @__regcall3__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double* inreg) +// Lin64: define x86_regcallcc void @__regcall3__hfa2(double %a.coerce0, double %a.coerce1, double %a.coerce2, double %a.coerce3, double %b.coerce0, double %b.coerce1, double %b.coerce2, double %b.coerce3, double %c) + +// Ensure that we pass builtin types directly while counting them against the +// SSE register usage. +void __regcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hfa3@56"(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1) +// Win64: define x86_regcallcc void @"\01__regcall3__hfa3@56"(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1) +// Lin32: define x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1) +// Lin64: define x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.coerce0, double %f.coerce1) + +// Aggregates with more than four elements are not HFAs and are passed byval. +// Because they are not classified as homogeneous, they don't get special +// handling to ensure alignment. +void __regcall hfa4(struct HFA5 a) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hfa4@40"(%struct.HFA5* byval align 4) +// Win64: define x86_regcallcc void @"\01__regcall3__hfa4@40"(%struct.HFA5* %a) +// Lin32: define x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* byval align 4 %a) +// Lin64: define x86_regcallcc void @__regcall3__hfa4(double %a.coerce0, double %a.coerce1, double %a.coerce2, double %a.coerce3, double %a.coerce4) + +// Return HFAs of 4 or fewer elements in registers. +static struct HFA2 g_hfa2; +struct HFA2 __regcall hfa5(void) { return g_hfa2; } +// Win32: define x86_regcallcc %struct.HFA2 @"\01__regcall3__hfa5@0"() +// Win64: define x86_regcallcc %struct.HFA2 @"\01__regcall3__hfa5@0"() +// Lin32: define x86_regcallcc %struct.HFA2 @__regcall3__hfa5() +// Lin64: define x86_regcallcc %struct.HFA2 @__regcall3__hfa5() + +typedef float __attribute__((vector_size(16))) v4f32; +struct HVA2 { v4f32 x, y; }; +struct HVA4 { v4f32 w, x, y, z; }; + +void __regcall hva1(int a, struct HVA4 b, int c) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hva1@72"(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c) +// Win64: define x86_regcallcc void @"\01__regcall3__hva1@80"(i32 %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 %c) +// Lin32: define x86_regcallcc void @__regcall3__hva1(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c) +// Lin64: define x86_regcallcc void @__regcall3__hva1(i32 %a, <4 x float> %b.coerce0, <4 x float> %b.coerce1, <4 x float> %b.coerce2, <4 x float> %b.coerce3, i32 %c) + +void __regcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hva2@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float>* inreg) +// Win64: define x86_regcallcc void @"\01__regcall3__hva2@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float> %c) +// Lin32: define x86_regcallcc void @__regcall3__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float>* inreg) +// Lin64: define x86_regcallcc void @__regcall3__hva2(<4 x float> %a.coerce0, <4 x float> %a.coerce1, <4 x float> %a.coerce2, <4 x float> %a.coerce3, <4 x float> %b.coerce0, <4 x float> %b.coerce1, <4 x float> %b.coerce2, <4 x float> %b.coerce3, <4 x float> %c) + +void __regcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {} +// Win32: define x86_regcallcc void @"\01__regcall3__hva3@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1) +// Win64: define x86_regcallcc void @"\01__regcall3__hva3@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1) +// Lin32: define x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1) +// Lin64: define x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.coerce0, <4 x float> %f.coerce1) + +typedef float __attribute__((ext_vector_type(3))) v3f32; +struct OddSizeHVA { v3f32 x, y; }; + +void __regcall odd_size_hva(struct OddSizeHVA a) {} +// Win32: define x86_regcallcc void @"\01__regcall3__odd_size_hva@32"(<3 x float> %a.0, <3 x float> %a.1) +// Win64: define x86_regcallcc void @"\01__regcall3__odd_size_hva@32"(<3 x float> %a.0, <3 x float> %a.1) +// Lin32: define x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1) +// Lin64: define x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.coerce0, <3 x float> %a.coerce1) + Index: test/CodeGenCXX/regcall.cpp =================================================================== --- test/CodeGenCXX/regcall.cpp +++ test/CodeGenCXX/regcall.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -std=c++11 %s -o - | FileCheck -check-prefix=CHECK-LIN %s +// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -std=c++11 %s -o - | FileCheck -check-prefix=CHECK-WIN %s + +int __regcall foo(int i); + +int main() +{ + int p = 0, _data; + auto lambda = [&](int parameter) -> int { + _data = foo(parameter); + return _data; + }; + return lambda(p); +} + +// CHECK-LIN: call x86_regcallcc {{.+}} @_Z15__regcall3__foo +// CHECK-WIN: call x86_regcallcc {{.+}}@{{.+}}__regcall3__foo + +int __regcall foo (int i){ + return i; +} + +// CHECK-LIN: define x86_regcallcc {{.+}}@_Z15__regcall3__foo +// CHECK-WIN: define x86_regcallcc {{.+}}@{{.+}}__regcall3__foo Index: tools/libclang/CXType.cpp =================================================================== --- tools/libclang/CXType.cpp +++ tools/libclang/CXType.cpp @@ -531,6 +531,7 @@ TCALLINGCONV(X86FastCall); TCALLINGCONV(X86ThisCall); TCALLINGCONV(X86Pascal); + TCALLINGCONV(X86RegCall); TCALLINGCONV(X86VectorCall); TCALLINGCONV(X86_64Win64); TCALLINGCONV(X86_64SysV);