Index: include/llvm/CodeGen/CommandFlags.h =================================================================== --- include/llvm/CodeGen/CommandFlags.h +++ include/llvm/CodeGen/CommandFlags.h @@ -45,6 +45,11 @@ cl::desc("Target specific attributes (-mattr=help for details)"), cl::value_desc("a1,+a2,-a3,...")); +cl::opt RegParm( + "regparm", cl::desc("set number of register parameters (X86 only)"), + cl::value_desc("[0-3]"), + cl::init(0)); + cl::opt RelocModel( "relocation-model", cl::desc("Choose relocation model"), cl::values( @@ -283,6 +288,7 @@ EnableHonorSignDependentRoundingFPMath; if (FloatABIForCalls != FloatABI::Default) Options.FloatABIType = FloatABIForCalls; + Options.RegParm = RegParm; Options.NoZerosInBSS = DontPlaceZerosInBSS; Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt; Options.StackAlignmentOverride = OverrideStackAlignment; Index: include/llvm/Target/TargetOptions.h =================================================================== --- include/llvm/Target/TargetOptions.h +++ include/llvm/Target/TargetOptions.h @@ -108,8 +108,8 @@ DisableIntegratedAS(false), CompressDebugSections(false), RelaxELFRelocations(false), FunctionSections(false), DataSections(false), UniqueSectionNames(true), TrapUnreachable(false), - EmulatedTLS(false), EnableIPRA(false), - FloatABIType(FloatABI::Default), + EmulatedTLS(false), EnableIPRA(false), RegParm(0), + FloatABIType(FloatABI::Default), AllowFPOpFusion(FPOpFusion::Standard), ThreadModel(ThreadModel::POSIX), EABIVersion(EABI::Default), DebuggerTuning(DebuggerKind::Default), @@ -225,6 +225,9 @@ /// This flag enables InterProcedural Register Allocation (IPRA). unsigned EnableIPRA : 1; + /// RegParm - The initial RegParm Value + unsigned RegParm; + /// FloatABIType - This setting is set by -float-abi=xxx option is specfied /// on the command line. This setting may either be Default, Soft, or Hard. /// Default selects the target's default behavior. Soft selects the ABI for Index: lib/Target/X86/X86CallingConv.h =================================================================== --- lib/Target/X86/X86CallingConv.h +++ lib/Target/X86/X86CallingConv.h @@ -18,6 +18,9 @@ #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/IR/CallingConv.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "X86TargetMachine.h" namespace llvm { @@ -50,18 +53,30 @@ " doesn't support long double and mask types yet."); } -inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, +inline bool CC_X86_32_AssignToReg_NoSplit(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure - // not to split i64 and double between a register and stack - static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX}; - static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]); - + // If the argument is InAlloc or ByVal bail. + if (ArgFlags.isInAlloca() || ArgFlags.isByVal()) + return false; + + // Similiar to AssignToReg, but do not split multi-reg args + // (i64/double) between a register and stack. + MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX}; + static const unsigned MaxRegs = sizeof(RegList)/sizeof(RegList[0]); + + auto NumRegs = State.getMachineFunction().getTarget().Options.RegParm; + if (static_cast(State.getMachineFunction().getSubtarget()).isTargetMCU()) + NumRegs = MaxRegs; + + assert(NumRegs <= MaxRegs && "More register parameters than registers"); + SmallVectorImpl &PendingMembers = State.getPendingLocs(); + unsigned FirstFree = std::min(NumRegs, State.getFirstUnallocated(RegList)); + // If this is the first part of an double/i64/i128, or if we're already // in the middle of a split, add to the pending list. If this is not // the end of the split, return, otherwise go on to process the pending @@ -76,10 +91,11 @@ // If there are no pending members, we are not in the middle of a split, // so do the usual inreg stuff. if (PendingMembers.empty()) { - if (unsigned Reg = State.AllocateReg(RegList)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return true; - } + if (FirstFree < NumRegs) + if (unsigned Reg = State.AllocateReg(RegList[FirstFree++])) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return true; + } return false; } @@ -87,17 +103,19 @@ // We now have the entire original argument in PendingMembers, so decide // whether to use registers or the stack. - // Per the MCU ABI: // a) To use registers, we need to have enough of them free to contain // the entire argument. // b) We never want to use more than 2 registers for a single argument. - unsigned FirstFree = State.getFirstUnallocated(RegList); bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree); for (auto &It : PendingMembers) { + // If available, always allocate register so subsequent + // arguments cannot use them. if (UseRegs) It.convertToReg(State.AllocateReg(RegList[FirstFree++])); + else if (FirstFree < MaxRegs) + It.convertToMem(State.AllocateStack(4, 4, RegList[FirstFree++])); else It.convertToMem(State.AllocateStack(4, 4)); State.addLoc(It); @@ -111,4 +129,3 @@ } // End llvm namespace #endif - Index: lib/Target/X86/X86CallingConv.td =================================================================== --- lib/Target/X86/X86CallingConv.td +++ lib/Target/X86/X86CallingConv.td @@ -796,24 +796,22 @@ // The first 3 integer arguments, if marked 'inreg' and if the call is not // a vararg call, are passed in integer registers. CCIfNotVarArg>>>, + // Assign to Reg if RegParm flag + CCIfNotVarArg>>, // Otherwise, same as everything else. CCDelegateTo ]>; -def CC_X86_32_MCU : CallingConv<[ - // Handles byval parameters. Note that, like FastCC, we can't rely on - // the delegation to CC_X86_32_Common because that happens after code that - // puts arguments in registers. - CCIfByVal>, +def CC_X86_32_MCU : CallingConv<[ // Promote i1/i8/i16 arguments to i32. CCIfType<[i1, i8, i16], CCPromoteToType>, // If the call is not a vararg call, some arguments may be passed // in integer registers. - CCIfNotVarArg>>, - + CCIfNotVarArg>>, + // Otherwise, same as everything else. CCDelegateTo ]>; @@ -990,7 +988,6 @@ CCIfCC<"CallingConv::GHC", CCDelegateTo>, CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo>, - // Otherwise, drop to normal X86-32 CC CCDelegateTo ]>; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1,3 +1,4 @@ + //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// // // The LLVM Compiler Infrastructure @@ -104,6 +105,11 @@ addBypassSlowDiv(64, 16); } + // Set all builtin calling conventions to BuiltinCC. + auto BuiltinCC = CallingConv::C; + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) + setLibcallCallingConv((RTLIB::Libcall)i, BuiltinCC); + if (Subtarget.isTargetKnownWindowsMSVC() || Subtarget.isTargetWindowsItanium()) { // Setup Windows compiler runtime calls. Index: test/CodeGen/X86/pr18415.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/pr18415.ll @@ -0,0 +1,52 @@ +; RUN: llc %s -mtriple i386-unknown-linux-gnu -regparm 0 -o - | FileCheck %s -check-prefix CHECK0 +; RUN: llc %s -mtriple i386-unknown-linux-gnu -regparm 3 -o - | FileCheck %s -check-prefix CHECK3 + +; ModuleID = '/usr/local/google/home/niravd/pr18415.c' +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @use_foo(i8* inreg %dest, i8* inreg %src, i32 inreg %n) #0 { + +; CHECK0-LABEL: @use_foo +; CHECK0-NOT: pushl +; CHECK0: jmp foo +; CHECK0-NOT: retl +; CHECK3-LABEL: @use_foo +; CHECK3-NOT: pushl +; CHECK3: jmp foo +; CHECK3-NOT: retl +%1 = tail call i8* @foo(i8* %dest, i8* %src, i32 %n) #4 + ret void +} + +declare i8* @foo(i8* inreg, i8* inreg, i32 inreg) #1 + +; Function Attrs: norecurse nounwind +define void @use_memcpy(i8* inreg nocapture %dest, i8* inreg nocapture readonly %src, i32 inreg %n) #2 { +; CHECK0-LABEL: @use_memcpy +; CHECK0: pushl %ecx +; CHECK0: pushl %edx +; CHECK0: pushl %eax +; CHECK0: calll memcpy +; CHECK0: retl +; CHECK3-LABEL: @use_memcpy +; CHECK3-NOT: pushl +; CHECK3: jmp memcpy +; CHECK3-NOT: retl +tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %n, i32 1, i1 false) + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #3 + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { norecurse nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" "regparm"="2"} +attributes #3 = { argmemonly nounwind} +attributes #4 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.8.0-2ubuntu3~trusty4 (tags/RELEASE_380/final)"} Index: test/CodeGen/X86/regparm.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/regparm.ll @@ -0,0 +1,104 @@ +; RUN: llc %s -mtriple i386-unknown-linux-gnu -regparm 0 -o - | FileCheck %s -check-prefix CHECK0 +; RUN: llc %s -mtriple i386-unknown-linux-gnu -regparm 1 -o - | FileCheck %s -check-prefix CHECK1 +; RUN: llc %s -mtriple i386-unknown-linux-gnu -regparm 2 -o - | FileCheck %s -check-prefix CHECK2 +; RUN: llc %s -mtriple i386-unknown-linux-gnu -regparm 3 -o - | FileCheck %s -check-prefix CHECK3 +target triple = "i386-unknown-linux-gnu" + +;CHECK0-LABEL: @test0 +;CHECK0: movl 4(%esp), %eax +;CHECK0-NEXT: addl 8(%esp), %eax +;CHECK0-NEXT: addl 12(%esp), %eax +;CHECK0-NEXT: retl + +;CHECK1-LABEL: @test0 +;CHECK1: addl 4(%esp), %eax +;CHECK1-NEXT: addl 8(%esp), %eax +;CHECK1-NEXT: retl + +;CHECK2-LABEL: @test0 +;CHECK2: addl %edx, %eax +;CHECK2-NEXT: addl 4(%esp), %eax +;CHECK2-NEXT: retl + +;CHECK3-LABEL: @test0 +;CHECK3: addl %edx, %eax +;CHECK3-NEXT: addl %ecx, %eax +;CHECK3-NEXT: retl + +define i32 @test0(i32 %a, i32 %b, i32 %c, i32 %d) { + %1 = add i32 %a, %b + %2 = add i32 %1, %c + %3 = add i32 %2, %d + ret i32 %2 +} + +; i64 requires 2 registers. If it does not fit, the 1 register is still allocated. + +;CHECK1-LABEL: @test1 +;CHECK1: movl 4(%esp), %eax +;CHECK1-NEXT: addl 12(%esp), %eax +;CHECK1-NEXT: addl 8(%esp), %eax +;CHECK1-NEXT: addl 16(%esp), %eax +;CHECK1-NEXT: addl 20(%esp), %eax +;CHECK1-NEXT: retl + +;CHECK2-LABEL: @test1 +;CHECK2: addl 4(%esp), %eax +;CHECK2-NEXT: addl %edx, %eax +;CHECK2-NEXT: addl 8(%esp), %eax +;CHECK2-NEXT: addl 12(%esp), %eax +;CHECK2-NEXT: retl + +;CHECK3-LABEL: @test1 +;CHECK3: addl %ecx, %eax +;CHECK3-NEXT: addl %edx, %eax +;CHECK3-NEXT: addl 4(%esp), %eax +;CHECK3-NEXT: addl 8(%esp), %eax +;CHECK3-NEXT: retl + +define i32 @test1(i64 %a, i32 %b, i32 %c, i32 %d) { + %shr = lshr i64 %a, 32 + %conv = trunc i64 %shr to i32 + %conv1 = trunc i64 %a to i32 + %add = add i32 %conv1, %b + %add2 = add i32 %add, %conv + %add3 = add i32 %add2, %c + %add4 = add i32 %add3, %d + ret i32 %add4 +} + +;CHECK1-LABEL: @test2 +;CHECK1: addl 4(%esp), %eax +;CHECK1-NEXT: addl 8(%esp), %eax +;CHECK1-NEXT: addl 12(%esp), %eax +;CHECK1-NEXT: addl 16(%esp), %eax +;CHECK1-NEXT: retl +;CHECK2-LABEL: @test2 +;CHECK2: addl 4(%esp), %eax +;CHECK2-NEXT: addl 8(%esp), %eax +;CHECK2-NEXT: addl 12(%esp), %eax +;CHECK2-NEXT: addl 16(%esp), %eax +;CHECK2-NEXT: retl +;CHECK3-LABEL: @test2 +;CHECK3: addl %edx, %eax +;CHECK3-NEXT: addl %ecx, %eax +;CHECK3-NEXT: addl 4(%esp), %eax +;CHECK3-NEXT: addl 8(%esp), %eax +;CHECK3-NEXT: retl + +define i32 @test2(i32 %b, i64 %a, i32 %c, i32 %d) { + %shr = lshr i64 %a, 32 + %conv = trunc i64 %shr to i32 + %conv1 = trunc i64 %a to i32 + %add = add i32 %conv1, %b + %add2 = add i32 %add, %conv + %add3 = add i32 %add2, %c + %add4 = add i32 %add3, %d + ret i32 %add4 +} + + + + + +