Index: include/llvm/IR/CallingConv.h =================================================================== --- include/llvm/IR/CallingConv.h +++ include/llvm/IR/CallingConv.h @@ -196,6 +196,14 @@ /// Register calling convention used for parameters transfer optimization X86_RegCall = 92, + /// C Calling convention for libraries. Checks caller's regparm subtarget + /// feature. + X86_LibCall = 93, + + /// StdCall Calling convention for libraries. Checks caller's regparm + /// subtarget feature. + X86_LibStdCall = 94, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; Index: include/llvm/IR/Module.h =================================================================== --- include/llvm/IR/Module.h +++ include/llvm/IR/Module.h @@ -722,9 +722,18 @@ /// that has "dropped all references", except operator delete. void dropAllReferences(); -/// @} -/// @name Utility functions for querying Debug information. -/// @{ + /// @} + /// @name Utility functions for querying Calling Convention information by + /// checking module flags. + /// @{ + + /// \brief Returns the Number of Register ParametersDwarf Version by checking + /// module flags. + unsigned getNumberRegisterParameters() const; + + /// @} + /// @name Utility functions for querying Debug information. + /// @{ /// \brief Returns the Dwarf Version by checking module flags. unsigned getDwarfVersion() const; Index: lib/AsmParser/LLLexer.cpp =================================================================== --- lib/AsmParser/LLLexer.cpp +++ lib/AsmParser/LLLexer.cpp @@ -571,7 +571,9 @@ KEYWORD(ccc); KEYWORD(fastcc); KEYWORD(coldcc); + KEYWORD(x86_libcallcc); KEYWORD(x86_stdcallcc); + KEYWORD(x86_libstdcallcc); KEYWORD(x86_fastcallcc); KEYWORD(x86_thiscallcc); KEYWORD(x86_vectorcallcc); Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -1690,7 +1690,13 @@ case lltok::kw_ccc: CC = CallingConv::C; break; case lltok::kw_fastcc: CC = CallingConv::Fast; break; case lltok::kw_coldcc: CC = CallingConv::Cold; break; + case lltok::kw_x86_libcallcc: + CC = CallingConv::X86_LibCall; + break; case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break; + case lltok::kw_x86_libstdcallcc: + CC = CallingConv::X86_LibStdCall; + break; case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break; case lltok::kw_x86_regcallcc: CC = CallingConv::X86_RegCall; break; case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break; Index: lib/AsmParser/LLToken.h =================================================================== --- lib/AsmParser/LLToken.h +++ lib/AsmParser/LLToken.h @@ -124,7 +124,9 @@ kw_fastcc, kw_coldcc, kw_intel_ocl_bicc, + kw_x86_libcallcc, kw_x86_stdcallcc, + kw_x86_libstdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, kw_x86_vectorcallcc, Index: lib/IR/AsmWriter.cpp =================================================================== --- lib/IR/AsmWriter.cpp +++ lib/IR/AsmWriter.cpp @@ -308,7 +308,13 @@ case CallingConv::PreserveAll: Out << "preserve_allcc"; break; case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break; case CallingConv::GHC: Out << "ghccc"; break; + case CallingConv::X86_LibCall: + Out << "x86_libcallcc"; + break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; + case CallingConv::X86_LibStdCall: + Out << "x86_libstdcallcc"; + break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break; case CallingConv::X86_RegCall: Out << "x86_regcallcc"; break; Index: lib/IR/Mangler.cpp =================================================================== --- lib/IR/Mangler.cpp +++ lib/IR/Mangler.cpp @@ -78,6 +78,7 @@ switch (CC) { case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: + case CallingConv::X86_LibStdCall: case CallingConv::X86_VectorCall: return true; default: Index: lib/IR/Module.cpp =================================================================== --- lib/IR/Module.cpp +++ lib/IR/Module.cpp @@ -465,6 +465,14 @@ GIF.dropAllReferences(); } +unsigned Module::getNumberRegisterParameters() const { + auto *Val = + cast_or_null<ConstantAsMetadata>(getModuleFlag("NumRegisterParameters")); + if (!Val) + return 0; + return cast<ConstantInt>(Val->getValue())->getZExtValue(); +} + unsigned Module::getDwarfVersion() const { auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("Dwarf Version")); if (!Val) Index: lib/Target/X86/X86CallingConv.h =================================================================== --- lib/Target/X86/X86CallingConv.h +++ lib/Target/X86/X86CallingConv.h @@ -16,8 +16,13 @@ #define LLVM_LIB_TARGET_X86_X86CALLINGCONV_H #include "MCTargetDesc/X86MCTargetDesc.h" +#include "X86TargetMachine.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Module.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" namespace llvm { @@ -57,18 +62,24 @@ return false; } -inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure - // not to split i64 and double between a register and stack - static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX}; - static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]); - +inline bool CC_X86_32_AssignToReg_MCU(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + // If the argument is InAlloc or ByVal bail. + if (ArgFlags.isInAlloca() || ArgFlags.isByVal()) + return false; + + // Similiar to AssignToReg, but do not split multi-reg args + // (i64/double) between a register and stack. + MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX}; + static const unsigned MaxRegs = sizeof(RegList) / sizeof(RegList[0]); + auto NumRegs = MaxRegs; + SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); + unsigned FirstFree = std::min(NumRegs, State.getFirstUnallocated(RegList)); + // If this is the first part of an double/i64/i128, or if we're already // in the middle of a split, add to the pending list. If this is not // the end of the split, return, otherwise go on to process the pending @@ -83,10 +94,83 @@ // If there are no pending members, we are not in the middle of a split, // so do the usual inreg stuff. if (PendingMembers.empty()) { - if (unsigned Reg = State.AllocateReg(RegList)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + if (FirstFree < NumRegs) + if (unsigned Reg = State.AllocateReg(RegList[FirstFree++])) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return true; + } + return false; + } + + assert(ArgFlags.isSplitEnd()); + + // We now have the entire original argument in PendingMembers, so decide + // whether to use registers or the stack. + // a) To use registers, we need to have enough of them free to contain + // the entire argument. + // b) We never want to use more than 2 registers for a single argument. + + bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree); + + for (auto &It : PendingMembers) { + // If available, always allocate register so subsequent + // arguments cannot use them. + if (UseRegs) + It.convertToReg(State.AllocateReg(RegList[FirstFree++])); + else + It.convertToMem(State.AllocateStack(4, 4)); + State.addLoc(It); + } + + PendingMembers.clear(); + + return true; +} + +inline bool CC_X86_32_AssignToReg_NoSplit(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + + // If the argument is InAlloc or ByVal bail. + if (ArgFlags.isInAlloca() || ArgFlags.isByVal()) + return false; + + // Similiar to AssignToReg, but do not split multi-reg args + // (i64/double) between a register and stack. + MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX}; + static const unsigned MaxRegs = sizeof(RegList) / sizeof(RegList[0]); + + unsigned NumRegs = State.getMachineFunction().getMMI() + .getModule() + ->getNumberRegisterParameters(); + + assert(NumRegs <= MaxRegs && "More register parameters than registers"); + + SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); + + unsigned FirstFree = std::min(NumRegs, State.getFirstUnallocated(RegList)); + + // If this is the first part of an double/i64/i128, or if we're already + // in the middle of a split, add to the pending list. If this is not + // the end of the split, return, otherwise go on to process the pending + // list + if (ArgFlags.isSplit() || !PendingMembers.empty()) { + PendingMembers.push_back( + CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); + if (!ArgFlags.isSplitEnd()) return true; - } + } + + // If there are no pending members, we are not in the middle of a split, + // so do the usual inreg stuff. + if (PendingMembers.empty()) { + if (FirstFree < NumRegs) + if (unsigned Reg = State.AllocateReg(RegList[FirstFree++])) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return true; + } return false; } @@ -94,17 +178,19 @@ // We now have the entire original argument in PendingMembers, so decide // whether to use registers or the stack. - // Per the MCU ABI: // a) To use registers, we need to have enough of them free to contain // the entire argument. // b) We never want to use more than 2 registers for a single argument. - unsigned FirstFree = State.getFirstUnallocated(RegList); bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree); for (auto &It : PendingMembers) { + // If available, always allocate register so subsequent + // arguments cannot use them. if (UseRegs) It.convertToReg(State.AllocateReg(RegList[FirstFree++])); + else if (FirstFree < MaxRegs) + It.convertToMem(State.AllocateStack(4, 4, RegList[FirstFree++])); else It.convertToMem(State.AllocateStack(4, 4)); State.addLoc(It); @@ -118,4 +204,3 @@ } // End llvm namespace #endif - Index: lib/Target/X86/X86CallingConv.td =================================================================== --- lib/Target/X86/X86CallingConv.td +++ lib/Target/X86/X86CallingConv.td @@ -810,19 +810,28 @@ CCDelegateTo<CC_X86_32_Common> ]>; -def CC_X86_32_MCU : CallingConv<[ - // Handles byval parameters. Note that, like FastCC, we can't rely on - // the delegation to CC_X86_32_Common because that happens after code that - // puts arguments in registers. - CCIfByVal<CCPassByVal<4, 4>>, +def CC_X86_32_LibCall : CallingConv<[ + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, + + // The 'nest' parameter, if any, is passed in ECX. + CCIfNest<CCAssignToReg<[ECX]>>, + // Assign to Reg if RegParm flag + CCIfNotVarArg<CCIfType<[i32], CCCustom<"CC_X86_32_AssignToReg_NoSplit">>>, + + // Otherwise, same as everything else. + CCDelegateTo<CC_X86_32_Common> +]>; + +def CC_X86_32_MCU : CallingConv<[ // Promote i1/i8/i16 arguments to i32. CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, // If the call is not a vararg call, some arguments may be passed // in integer registers. - CCIfNotVarArg<CCIfType<[i32], CCCustom<"CC_X86_32_MCUInReg">>>, - + CCIfNotVarArg<CCIfType<[i32], CCCustom<"CC_X86_32_AssignToReg_MCU">>>, + // Otherwise, same as everything else. CCDelegateTo<CC_X86_32_Common> ]>; @@ -984,6 +993,10 @@ CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>, CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_32_RegCall>>, + //Library Call CCs + CCIfCC<"CallingConv::X86_LibCall", CCDelegateTo<CC_X86_32_LibCall>>, + CCIfCC<"CallingConv::X86_LibStdCall", CCDelegateTo<CC_X86_32_LibCall>>, + // Otherwise, drop to normal X86-32 CC CCDelegateTo<CC_X86_32_C> ]>; Index: lib/Target/X86/X86FastISel.cpp =================================================================== --- lib/Target/X86/X86FastISel.cpp +++ lib/Target/X86/X86FastISel.cpp @@ -1195,12 +1195,10 @@ return false; CallingConv::ID CC = F.getCallingConv(); - if (CC != CallingConv::C && - CC != CallingConv::Fast && - CC != CallingConv::X86_FastCall && - CC != CallingConv::X86_StdCall && - CC != CallingConv::X86_ThisCall && - CC != CallingConv::X86_64_SysV && + if (CC != CallingConv::C && CC != CallingConv::X86_LibCall && + CC != CallingConv::Fast && CC != CallingConv::X86_FastCall && + CC != CallingConv::X86_StdCall && CC != CallingConv::X86_LibStdCall && + CC != CallingConv::X86_ThisCall && CC != CallingConv::X86_64_SysV && CC != CallingConv::X86_64_Win64) return false; @@ -3131,11 +3129,13 @@ switch (CC) { default: return false; case CallingConv::C: + case CallingConv::X86_LibCall: case CallingConv::Fast: case CallingConv::WebKit_JS: case CallingConv::Swift: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: + case CallingConv::X86_LibStdCall: case CallingConv::X86_ThisCall: case CallingConv::X86_64_Win64: case CallingConv::X86_64_SysV: Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -105,6 +105,10 @@ addBypassSlowDiv(64, 32); } + // Set all builtin calling conventions to BuiltinCC. + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) + setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::X86_LibCall); + if (Subtarget.isTargetKnownWindowsMSVC() || Subtarget.isTargetWindowsItanium()) { // Setup Windows compiler runtime calls. @@ -113,11 +117,11 @@ setLibcallName(RTLIB::SREM_I64, "_allrem"); setLibcallName(RTLIB::UREM_I64, "_aullrem"); setLibcallName(RTLIB::MUL_I64, "_allmul"); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); - setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); - setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); - setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_LibStdCall); + setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_LibStdCall); + setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_LibStdCall); + setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_LibStdCall); + setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_LibStdCall); } if (Subtarget.isTargetDarwin()) { @@ -2624,11 +2628,13 @@ switch (CC) { // C calling conventions: case CallingConv::C: + case CallingConv::X86_LibCall: case CallingConv::X86_64_Win64: case CallingConv::X86_64_SysV: // Callee pop conventions: case CallingConv::X86_ThisCall: case CallingConv::X86_StdCall: + case CallingConv::X86_LibStdCall: case CallingConv::X86_VectorCall: case CallingConv::X86_FastCall: return true; @@ -4198,6 +4204,7 @@ default: return false; case CallingConv::X86_StdCall: + case CallingConv::X86_LibStdCall: case CallingConv::X86_FastCall: case CallingConv::X86_ThisCall: case CallingConv::X86_VectorCall: @@ -20234,7 +20241,7 @@ } // FIXME? Maybe this could be a TableGen attribute on some registers and -// this table could be generated automatically from RegInfo. +// this table could be generated automatically from RegInfo unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const { const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); @@ -20428,7 +20435,9 @@ default: llvm_unreachable("Unsupported calling convention"); case CallingConv::C: - case CallingConv::X86_StdCall: { + case CallingConv::X86_LibCall: + case CallingConv::X86_StdCall: + case CallingConv::X86_LibStdCall: { // Pass 'nest' parameter in ECX. // Must be kept in sync with X86CallingConv.td NestReg = X86::ECX; Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -563,9 +563,11 @@ switch (CC) { // On Win64, all these conventions just use the default convention. case CallingConv::C: + case CallingConv::X86_LibCall: case CallingConv::Fast: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: + case CallingConv::X86_LibStdCall: case CallingConv::X86_ThisCall: case CallingConv::X86_VectorCall: case CallingConv::Intel_OCL_BI: Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -359,4 +359,3 @@ bool X86Subtarget::enableEarlyIfConversion() const { return hasCMov() && X86EarlyIfConv; } - Index: lib/Target/X86/X86WinEHState.cpp =================================================================== --- lib/Target/X86/X86WinEHState.cpp +++ lib/Target/X86/X86WinEHState.cpp @@ -303,7 +303,7 @@ "__CxxLongjmpUnwind", FunctionType::get(VoidTy, Int8PtrType, /*isVarArg=*/false)); cast<Function>(CxxLongjmpUnwind->stripPointerCasts()) - ->setCallingConv(CallingConv::X86_StdCall); + ->setCallingConv(CallingConv::X86_LibStdCall); } else if (Personality == EHPersonality::MSVC_X86SEH) { // If _except_handler4 is in use, some additional guard checks and prologue // stuff is required. @@ -356,7 +356,7 @@ FunctionType::get(Type::getVoidTy(TheModule->getContext()), Int8PtrType, /*isVarArg=*/false)); cast<Function>(SehLongjmpUnwind->stripPointerCasts()) - ->setCallingConv(CallingConv::X86_StdCall); + ->setCallingConv(CallingConv::X86_LibStdCall); } else { llvm_unreachable("unexpected personality function"); } Index: test/CodeGen/X86/dwarf-eh-prepare.ll =================================================================== --- test/CodeGen/X86/dwarf-eh-prepare.ll +++ test/CodeGen/X86/dwarf-eh-prepare.ll @@ -50,7 +50,7 @@ resume { i8*, i32 } %new_ehvals ; CHECK: eh.resume: -; CHECK-NEXT: call void @_Unwind_Resume(i8* %ehptr) +; CHECK-NEXT: call x86_libcallcc void @_Unwind_Resume(i8* %ehptr) } @@ -88,7 +88,7 @@ ; CHECK: landingpad { i8*, i32 } ; CHECK-NOT: br i1 ; CHECK: ret i32 1 -; CHECK-NOT: call void @_Unwind_Resume +; CHECK-NOT: call x86_libcallcc void @_Unwind_Resume ; CHECK: {{^[}]}} @@ -152,7 +152,7 @@ ; CHECK: catch_int: ; CHECK: ret i32 1 ; CHECK: eh.resume: -; CHECK-NEXT: call void @_Unwind_Resume(i8* %ehptr) +; CHECK-NEXT: call x86_libcallcc void @_Unwind_Resume(i8* %ehptr) declare i32 @__gxx_personality_v0(...) declare i32 @llvm.eh.typeid.for(i8*) Index: test/CodeGen/X86/regparm.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/regparm.ll @@ -0,0 +1,31 @@ +; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK + +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) #1 + +define void @use_memset(i8* inreg nocapture %dest, i32 inreg %c, i32 inreg %n) local_unnamed_addr #0 { +entry: +;CHECK-LABEL: @use_memset +;CHECK-NOT: push +;CHECK: jmp memset +;CHECK-NOT: retl + %0 = trunc i32 %c to i8 + tail call void @llvm.memset.p0i8.i32(i8* %dest, i8 %0, i32 %n, i32 1, i1 false) + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) #1 + + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"NumRegisterParameters", i32 3} +!1 = !{!"clang version 4.0.0 (trunk 288025) (llvm/trunk 288033)"}