Skip to content

Commit

Permalink
[X86] Better support for the MCU psABI (LLVM part)
Browse files Browse the repository at this point in the history
This adds support for the MCU psABI in a way different from r251223 and r251224,
basically reverting most of these two patches. The problem with the approach
taken in r251223/4 is that it only handled libcalls that originated from the backend.
However, the mid-end also inserts quite a few libcalls and assumes these use the
platform's default calling convention.

The previous patch tried to insert inregs when necessary both in the FE and,
somewhat hackily, in the CG. Instead, we now define a new default calling convention
for the MCU, which doesn't use inreg marking at all, similarly to what x86-64 does.

Differential Revision: http://reviews.llvm.org/D15054

llvm-svn: 256494
  • Loading branch information
Michael Kuperstein committed Dec 28, 2015

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 175a7cb commit 2ea81ba
Showing 10 changed files with 204 additions and 52 deletions.
5 changes: 5 additions & 0 deletions llvm/include/llvm/Target/TargetCallingConv.h
Original file line number Diff line number Diff line change
@@ -46,6 +46,8 @@ namespace ISD {
static const uint64_t SplitOffs = 11;
static const uint64_t InAlloca = 1ULL<<12; ///< Passed with inalloca
static const uint64_t InAllocaOffs = 12;
static const uint64_t SplitEnd = 1ULL<<13; ///< Last part of a split
static const uint64_t SplitEndOffs = 13;
static const uint64_t OrigAlign = 0x1FULL<<27;
static const uint64_t OrigAlignOffs = 27;
static const uint64_t ByValSize = 0x3fffffffULL<<32; ///< Struct size
@@ -103,6 +105,9 @@ namespace ISD {
bool isSplit() const { return Flags & Split; }
void setSplit() { Flags |= One << SplitOffs; }

bool isSplitEnd() const { return Flags & SplitEnd; }
void setSplitEnd() { Flags |= One << SplitEndOffs; }

unsigned getOrigAlign() const {
return (unsigned)
((One << ((Flags & OrigAlign) >> OrigAlignOffs)) / 2);
7 changes: 0 additions & 7 deletions llvm/include/llvm/Target/TargetLowering.h
Original file line number Diff line number Diff line change
@@ -2453,13 +2453,6 @@ class TargetLowering : public TargetLoweringBase {

};

// Mark inreg arguments for lib-calls. For normal calls this is done by
// the frontend ABI code.
virtual void markInRegArguments(SelectionDAG &DAG,
TargetLowering::ArgListTy &Args) const {
return;
}

/// This function lowers an abstract call to a function into an actual call.
/// This returns a pair of operands. The first element is the return value
/// for the function (if RetTy is not VoidTy). The second element is the
10 changes: 8 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
@@ -7145,8 +7145,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
i, j*Parts[j].getValueType().getStoreSize());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0)
else if (j != 0) {
MyFlags.Flags.setOrigAlign(1);
if (j == NumParts - 1)
MyFlags.Flags.setSplitEnd();
}

CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
@@ -7390,8 +7393,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
else if (i > 0)
else if (i > 0) {
MyFlags.Flags.setOrigAlign(1);
if (i == NumRegs - 1)
MyFlags.Flags.setSplitEnd();
}
Ins.push_back(MyFlags);
}
if (NeedsRegBlock && Value == NumValues - 1)
2 changes: 0 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
@@ -101,8 +101,6 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
Args.push_back(Entry);
}

markInRegArguments(DAG, Args);

if (LC == RTLIB::UNKNOWN_LIBCALL)
report_fatal_error("Unsupported library call operation!");
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
59 changes: 59 additions & 0 deletions llvm/lib/Target/X86/X86CallingConv.h
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
#define LLVM_LIB_TARGET_X86_X86CALLINGCONV_H

#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/IR/CallingConv.h"

@@ -42,6 +43,64 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
return false;
}

inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
// This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
// not to split i64 and double between a register and stack
static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]);

SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();

// If this is the first part of an double/i64/i128, or if we're already
// in the middle of a split, add to the pending list. If this is not
// the end of the split, return, otherwise go on to process the pending
// list
if (ArgFlags.isSplit() || !PendingMembers.empty()) {
PendingMembers.push_back(
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
if (!ArgFlags.isSplitEnd())
return true;
}

// If there are no pending members, we are not in the middle of a split,
// so do the usual inreg stuff.
if (PendingMembers.empty()) {
if (unsigned Reg = State.AllocateReg(RegList)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return true;
}
return false;
}

assert(ArgFlags.isSplitEnd());

// We now have the entire original argument in PendingMembers, so decide
// whether to use registers or the stack.
// Per the MCU ABI:
// a) To use registers, we need to have enough of them free to contain
// the entire argument.
// b) We never want to use more than 2 registers for a single argument.

unsigned FirstFree = State.getFirstUnallocated(RegList);
bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);

for (auto &It : PendingMembers) {
if (UseRegs)
It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
else
It.convertToMem(State.AllocateStack(4, 4));
State.addLoc(It);
}

PendingMembers.clear();

return true;
}

} // End llvm namespace

#endif
18 changes: 18 additions & 0 deletions llvm/lib/Target/X86/X86CallingConv.td
Original file line number Diff line number Diff line change
@@ -592,6 +592,23 @@ def CC_X86_32_C : CallingConv<[
CCDelegateTo<CC_X86_32_Common>
]>;

def CC_X86_32_MCU : CallingConv<[
// Handles byval parameters. Note that, like FastCC, we can't rely on
// the delegation to CC_X86_32_Common because that happens after code that
// puts arguments in registers.
CCIfByVal<CCPassByVal<4, 4>>,

// Promote i1/i8/i16 arguments to i32.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,

// If the call is not a vararg call, some arguments may be passed
// in integer registers.
CCIfNotVarArg<CCIfType<[i32], CCCustom<"CC_X86_32_MCUInReg">>>,

// Otherwise, same as everything else.
CCDelegateTo<CC_X86_32_Common>
]>;

def CC_X86_32_FastCall : CallingConv<[
// Promote i1/i8/i16 arguments to i32.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -753,6 +770,7 @@ def CC_X86_64_Intr : CallingConv<[

// This is the root argument convention for the X86-32 backend.
def CC_X86_32 : CallingConv<[
CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_32_VectorCall>>,
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
13 changes: 6 additions & 7 deletions llvm/lib/Target/X86/X86FastISel.cpp
Original file line number Diff line number Diff line change
@@ -1098,12 +1098,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
RetRegs.push_back(VA.getLocReg());
}

// The x86-64 ABI for returning structs by value requires that we copy
// the sret argument into %rax for the return. We saved the argument into
// a virtual register in the entry block, so now we copy the value out
// and into %rax. We also do the same with %eax for Win32.
if (F.hasStructRetAttr() &&
(Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
// All x86 ABIs require that for returning structs by value we copy
// the sret argument into %rax/%eax (depending on ABI) for the return.
// We saved the argument into a virtual register in the entry block,
// so now we copy the value out and into %rax/%eax.
if (F.hasStructRetAttr()) {
unsigned Reg = X86MFInfo->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments()!");
@@ -2820,7 +2819,7 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,

if (CS)
if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) ||
CS->paramHasAttr(1, Attribute::InReg))
CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU())
return 0;

return 4;
36 changes: 6 additions & 30 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
@@ -2447,28 +2447,28 @@ enum StructReturnType {
StackStructReturn
};
static StructReturnType
callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
if (Outs.empty())
return NotStructReturn;

const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
if (!Flags.isSRet())
return NotStructReturn;
if (Flags.isInReg())
if (Flags.isInReg() || IsMCU)
return RegStructReturn;
return StackStructReturn;
}

/// Determines whether a function uses struct return semantics.
static StructReturnType
argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
if (Ins.empty())
return NotStructReturn;

const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
if (!Flags.isSRet())
return NotStructReturn;
if (Flags.isInReg())
if (Flags.isInReg() || IsMCU)
return RegStructReturn;
return StackStructReturn;
}
@@ -2945,7 +2945,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
// If this is an sret function, the return should pop the hidden pointer.
if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
!Subtarget->getTargetTriple().isOSMSVCRT() &&
argsAreStructReturn(Ins) == StackStructReturn)
argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn)
FuncInfo->setBytesToPopOnReturn(4);
}

@@ -3065,7 +3065,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
StructReturnType SR = callIsStructReturn(Outs);
StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU());
bool IsSibcall = false;
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
@@ -28661,27 +28661,3 @@ bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
Attribute::MinSize);
return OptSize && !VT.isVector();
}

void X86TargetLowering::markInRegArguments(SelectionDAG &DAG,
TargetLowering::ArgListTy& Args) const {
// The MCU psABI requires some arguments to be passed in-register.
// For regular calls, the inreg arguments are marked by the front-end.
// However, for compiler generated library calls, we have to patch this
// up here.
if (!Subtarget->isTargetMCU() || !Args.size())
return;

unsigned FreeRegs = 3;
for (auto &Arg : Args) {
// For library functions, we do not expect any fancy types.
unsigned Size = DAG.getDataLayout().getTypeSizeInBits(Arg.Ty);
unsigned SizeInRegs = (Size + 31) / 32;
if (SizeInRegs > 2 || SizeInRegs > FreeRegs)
continue;

Arg.isInReg = true;
FreeRegs -= SizeInRegs;
if (!FreeRegs)
break;
}
}
3 changes: 0 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
@@ -927,9 +927,6 @@ namespace llvm {

bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;

void markInRegArguments(SelectionDAG &DAG, TargetLowering::ArgListTy& Args)
const override;

protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
103 changes: 102 additions & 1 deletion llvm/test/CodeGen/X86/mcu-abi.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,112 @@
; RUN: llc < %s -mtriple=i686-pc-elfiamcu | FileCheck %s

%struct.st12_t = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }

; CHECK-LABEL: test_ints:
; CHECK: addl %edx, %eax
; CHECK-NEXT: imull %ecx, %eax
; CHECK-NEXT: addl 4(%esp), %eax
; CHECK-NEXT: retl
define i32 @test_ints(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
entry:
%r1 = add i32 %b, %a
%r2 = mul i32 %c, %r1
%r3 = add i32 %d, %r2
ret i32 %r3
}

; CHECK-LABEL: test_floats:
; CHECK: addl %edx, %eax
; CHECK-NEXT: imull %ecx, %eax
; CHECK-NEXT: addl 4(%esp), %eax
; CHECK-NEXT: retl
define i32 @test_floats(i32 %a, i32 %b, float %c, float %d) #0 {
entry:
%ci = bitcast float %c to i32
%di = bitcast float %d to i32
%r1 = add i32 %b, %a
%r2 = mul i32 %ci, %r1
%r3 = add i32 %di, %r2
ret i32 %r3
}

; CHECK-LABEL: test_doubles:
; CHECK: addl 4(%esp), %eax
; CHECK-NEXT: adcl 8(%esp), %edx
; CHECK-NEXT: retl
define double @test_doubles(double %d1, double %d2) #0 {
entry:
%d1i = bitcast double %d1 to i64
%d2i = bitcast double %d2 to i64
%r = add i64 %d1i, %d2i
%rd = bitcast i64 %r to double
ret double %rd
}

; CHECK-LABEL: test_mixed_doubles:
; CHECK: addl %ecx, %eax
; CHECK-NEXT: adcl $0, %edx
; CHECK-NEXT: retl
define double @test_mixed_doubles(double %d2, i32 %i) #0 {
entry:
%iext = zext i32 %i to i64
%d2i = bitcast double %d2 to i64
%r = add i64 %iext, %d2i
%rd = bitcast i64 %r to double
ret double %rd
}

; CHECK-LABEL: ret_large_struct:
; CHECK: pushl %esi
; CHECK-NEXT: movl %eax, %esi
; CHECK-NEXT: leal 8(%esp), %edx
; CHECK-NEXT: movl $48, %ecx
; CHECK-NEXT: calll memcpy
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: popl %esi
; CHECK-NOT: retl $4
; CHECK-NEXT: retl
define void @ret_large_struct(%struct.st12_t* noalias nocapture sret %agg.result, %struct.st12_t* byval nocapture readonly align 4 %r) #0 {
entry:
%0 = bitcast %struct.st12_t* %agg.result to i8*
%1 = bitcast %struct.st12_t* %r to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 48, i32 1, i1 false)
ret void
}

; CHECK-LABEL: var_args:
; CHECK: movl 4(%esp), %eax
; CHECK-NEXT: retl
define i32 @var_args(i32 %i1, ...) #0 {
entry:
ret i32 %i1
}

; CHECK-LABEL: test_lib_args:
; CHECK: movl %edx, %eax
; CHECK: calll __fixsfsi
define i32 @test_lib_args(float inreg %a, float inreg %b) #0 {
define i32 @test_lib_args(float %a, float %b) #0 {
%ret = fptosi float %b to i32
ret i32 %ret
}

; CHECK-LABEL: test_fp128:
; CHECK: movl (%eax), %e[[CX:..]]
; CHECK-NEXT: movl 4(%eax), %e[[DX:..]]
; CHECK-NEXT: movl 8(%eax), %e[[SI:..]]
; CHECK-NEXT: movl 12(%eax), %e[[AX:..]]
; CHECK-NEXT: movl %e[[AX]], 12(%esp)
; CHECK-NEXT: movl %e[[SI]], 8(%esp)
; CHECK-NEXT: movl %e[[DX]], 4(%esp)
; CHECK-NEXT: movl %e[[CX]], (%esp)
; CHECK-NEXT: calll __fixtfsi
define i32 @test_fp128(fp128* %ptr) #0 {
%v = load fp128, fp128* %ptr
%ret = fptosi fp128 %v to i32
ret i32 %ret
}

declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1

attributes #0 = { nounwind "use-soft-float"="true"}
attributes #1 = { nounwind argmemonly }

0 comments on commit 2ea81ba

Please sign in to comment.