Index: llvm/trunk/include/llvm/Target/TargetCallingConv.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetCallingConv.h +++ llvm/trunk/include/llvm/Target/TargetCallingConv.h @@ -46,6 +46,8 @@ static const uint64_t SplitOffs = 11; static const uint64_t InAlloca = 1ULL<<12; ///< Passed with inalloca static const uint64_t InAllocaOffs = 12; + static const uint64_t SplitEnd = 1ULL<<13; ///< Last part of a split + static const uint64_t SplitEndOffs = 13; static const uint64_t OrigAlign = 0x1FULL<<27; static const uint64_t OrigAlignOffs = 27; static const uint64_t ByValSize = 0x3fffffffULL<<32; ///< Struct size @@ -103,6 +105,9 @@ bool isSplit() const { return Flags & Split; } void setSplit() { Flags |= One << SplitOffs; } + bool isSplitEnd() const { return Flags & SplitEnd; } + void setSplitEnd() { Flags |= One << SplitEndOffs; } + unsigned getOrigAlign() const { return (unsigned) ((One << ((Flags & OrigAlign) >> OrigAlignOffs)) / 2); Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -2453,13 +2453,6 @@ }; - // Mark inreg arguments for lib-calls. For normal calls this is done by - // the frontend ABI code. - virtual void markInRegArguments(SelectionDAG &DAG, - TargetLowering::ArgListTy &Args) const { - return; - } - /// This function lowers an abstract call to a function into an actual call. /// This returns a pair of operands. The first element is the return value /// for the function (if RetTy is not VoidTy). The second element is the Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7145,8 +7145,11 @@ i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); - else if (j != 0) + else if (j != 0) { MyFlags.Flags.setOrigAlign(1); + if (j == NumParts - 1) + MyFlags.Flags.setSplitEnd(); + } CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); @@ -7390,8 +7393,11 @@ if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 - else if (i > 0) + else if (i > 0) { MyFlags.Flags.setOrigAlign(1); + if (i == NumRegs - 1) + MyFlags.Flags.setSplitEnd(); + } Ins.push_back(MyFlags); } if (NeedsRegBlock && Value == NumValues - 1) Index: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -101,8 +101,6 @@ Args.push_back(Entry); } - markInRegArguments(DAG, Args); - if (LC == RTLIB::UNKNOWN_LIBCALL) report_fatal_error("Unsupported library call operation!"); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), Index: llvm/trunk/lib/Target/X86/X86CallingConv.h =================================================================== --- llvm/trunk/lib/Target/X86/X86CallingConv.h +++ llvm/trunk/lib/Target/X86/X86CallingConv.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_X86_X86CALLINGCONV_H #define LLVM_LIB_TARGET_X86_X86CALLINGCONV_H +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/IR/CallingConv.h" @@ -42,6 +43,64 @@ return false; } +inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure + // not to split i64 and double between a register and stack + static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX}; + static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]); + + SmallVectorImpl &PendingMembers = State.getPendingLocs(); + + // If this is the first part of an double/i64/i128, or if we're already + // in the middle of a split, add to the pending list. If this is not + // the end of the split, return, otherwise go on to process the pending + // list + if (ArgFlags.isSplit() || !PendingMembers.empty()) { + PendingMembers.push_back( + CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); + if (!ArgFlags.isSplitEnd()) + return true; + } + + // If there are no pending members, we are not in the middle of a split, + // so do the usual inreg stuff. + if (PendingMembers.empty()) { + if (unsigned Reg = State.AllocateReg(RegList)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return true; + } + return false; + } + + assert(ArgFlags.isSplitEnd()); + + // We now have the entire original argument in PendingMembers, so decide + // whether to use registers or the stack. + // Per the MCU ABI: + // a) To use registers, we need to have enough of them free to contain + // the entire argument. + // b) We never want to use more than 2 registers for a single argument. + + unsigned FirstFree = State.getFirstUnallocated(RegList); + bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree); + + for (auto &It : PendingMembers) { + if (UseRegs) + It.convertToReg(State.AllocateReg(RegList[FirstFree++])); + else + It.convertToMem(State.AllocateStack(4, 4)); + State.addLoc(It); + } + + PendingMembers.clear(); + + return true; +} + } // End llvm namespace #endif Index: llvm/trunk/lib/Target/X86/X86CallingConv.td =================================================================== --- llvm/trunk/lib/Target/X86/X86CallingConv.td +++ llvm/trunk/lib/Target/X86/X86CallingConv.td @@ -592,6 +592,23 @@ CCDelegateTo ]>; +def CC_X86_32_MCU : CallingConv<[ + // Handles byval parameters. Note that, like FastCC, we can't rely on + // the delegation to CC_X86_32_Common because that happens after code that + // puts arguments in registers. + CCIfByVal>, + + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, + + // If the call is not a vararg call, some arguments may be passed + // in integer registers. + CCIfNotVarArg>>, + + // Otherwise, same as everything else. + CCDelegateTo +]>; + def CC_X86_32_FastCall : CallingConv<[ // Promote i1/i8/i16 arguments to i32. CCIfType<[i1, i8, i16], CCPromoteToType>, @@ -753,6 +770,7 @@ // This is the root argument convention for the X86-32 backend. def CC_X86_32 : CallingConv<[ + CCIfSubtarget<"isTargetMCU()", CCDelegateTo>, CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo>, CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo>, Index: llvm/trunk/lib/Target/X86/X86FastISel.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86FastISel.cpp +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp @@ -1098,12 +1098,11 @@ RetRegs.push_back(VA.getLocReg()); } - // The x86-64 ABI for returning structs by value requires that we copy - // the sret argument into %rax for the return. We saved the argument into - // a virtual register in the entry block, so now we copy the value out - // and into %rax. We also do the same with %eax for Win32. - if (F.hasStructRetAttr() && - (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) { + // All x86 ABIs require that for returning structs by value we copy + // the sret argument into %rax/%eax (depending on ABI) for the return. + // We saved the argument into a virtual register in the entry block, + // so now we copy the value out and into %rax/%eax. + if (F.hasStructRetAttr()) { unsigned Reg = X86MFInfo->getSRetReturnReg(); assert(Reg && "SRetReturnReg should have been set in LowerFormalArguments()!"); @@ -2820,7 +2819,7 @@ if (CS) if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) || - CS->paramHasAttr(1, Attribute::InReg)) + CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU()) return 0; return 4; Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -927,9 +927,6 @@ bool isIntDivCheap(EVT VT, AttributeSet Attr) const override; - void markInRegArguments(SelectionDAG &DAG, TargetLowering::ArgListTy& Args) - const override; - protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -2447,28 +2447,28 @@ StackStructReturn }; static StructReturnType -callIsStructReturn(const SmallVectorImpl &Outs) { +callIsStructReturn(const SmallVectorImpl &Outs, bool IsMCU) { if (Outs.empty()) return NotStructReturn; const ISD::ArgFlagsTy &Flags = Outs[0].Flags; if (!Flags.isSRet()) return NotStructReturn; - if (Flags.isInReg()) + if (Flags.isInReg() || IsMCU) return RegStructReturn; return StackStructReturn; } /// Determines whether a function uses struct return semantics. static StructReturnType -argsAreStructReturn(const SmallVectorImpl &Ins) { +argsAreStructReturn(const SmallVectorImpl &Ins, bool IsMCU) { if (Ins.empty()) return NotStructReturn; const ISD::ArgFlagsTy &Flags = Ins[0].Flags; if (!Flags.isSRet()) return NotStructReturn; - if (Flags.isInReg()) + if (Flags.isInReg() || IsMCU) return RegStructReturn; return StackStructReturn; } @@ -2945,7 +2945,7 @@ // If this is an sret function, the return should pop the hidden pointer. if (!Is64Bit && !canGuaranteeTCO(CallConv) && !Subtarget->getTargetTriple().isOSMSVCRT() && - argsAreStructReturn(Ins) == StackStructReturn) + argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn) FuncInfo->setBytesToPopOnReturn(4); } @@ -3065,7 +3065,7 @@ MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CallConv); - StructReturnType SR = callIsStructReturn(Outs); + StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU()); bool IsSibcall = false; X86MachineFunctionInfo *X86Info = MF.getInfo(); auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); @@ -28661,27 +28661,3 @@ Attribute::MinSize); return OptSize && !VT.isVector(); } - -void X86TargetLowering::markInRegArguments(SelectionDAG &DAG, - TargetLowering::ArgListTy& Args) const { - // The MCU psABI requires some arguments to be passed in-register. - // For regular calls, the inreg arguments are marked by the front-end. - // However, for compiler generated library calls, we have to patch this - // up here. - if (!Subtarget->isTargetMCU() || !Args.size()) - return; - - unsigned FreeRegs = 3; - for (auto &Arg : Args) { - // For library functions, we do not expect any fancy types. - unsigned Size = DAG.getDataLayout().getTypeSizeInBits(Arg.Ty); - unsigned SizeInRegs = (Size + 31) / 32; - if (SizeInRegs > 2 || SizeInRegs > FreeRegs) - continue; - - Arg.isInReg = true; - FreeRegs -= SizeInRegs; - if (!FreeRegs) - break; - } -} Index: llvm/trunk/test/CodeGen/X86/mcu-abi.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/mcu-abi.ll +++ llvm/trunk/test/CodeGen/X86/mcu-abi.ll @@ -1,11 +1,112 @@ ; RUN: llc < %s -mtriple=i686-pc-elfiamcu | FileCheck %s +%struct.st12_t = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } + +; CHECK-LABEL: test_ints: +; CHECK: addl %edx, %eax +; CHECK-NEXT: imull %ecx, %eax +; CHECK-NEXT: addl 4(%esp), %eax +; CHECK-NEXT: retl +define i32 @test_ints(i32 %a, i32 %b, i32 %c, i32 %d) #0 { +entry: + %r1 = add i32 %b, %a + %r2 = mul i32 %c, %r1 + %r3 = add i32 %d, %r2 + ret i32 %r3 +} + +; CHECK-LABEL: test_floats: +; CHECK: addl %edx, %eax +; CHECK-NEXT: imull %ecx, %eax +; CHECK-NEXT: addl 4(%esp), %eax +; CHECK-NEXT: retl +define i32 @test_floats(i32 %a, i32 %b, float %c, float %d) #0 { +entry: + %ci = bitcast float %c to i32 + %di = bitcast float %d to i32 + %r1 = add i32 %b, %a + %r2 = mul i32 %ci, %r1 + %r3 = add i32 %di, %r2 + ret i32 %r3 +} + +; CHECK-LABEL: test_doubles: +; CHECK: addl 4(%esp), %eax +; CHECK-NEXT: adcl 8(%esp), %edx +; CHECK-NEXT: retl +define double @test_doubles(double %d1, double %d2) #0 { +entry: + %d1i = bitcast double %d1 to i64 + %d2i = bitcast double %d2 to i64 + %r = add i64 %d1i, %d2i + %rd = bitcast i64 %r to double + ret double %rd +} + +; CHECK-LABEL: test_mixed_doubles: +; CHECK: addl %ecx, %eax +; CHECK-NEXT: adcl $0, %edx +; CHECK-NEXT: retl +define double @test_mixed_doubles(double %d2, i32 %i) #0 { +entry: + %iext = zext i32 %i to i64 + %d2i = bitcast double %d2 to i64 + %r = add i64 %iext, %d2i + %rd = bitcast i64 %r to double + ret double %rd +} + +; CHECK-LABEL: ret_large_struct: +; CHECK: pushl %esi +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: leal 8(%esp), %edx +; CHECK-NEXT: movl $48, %ecx +; CHECK-NEXT: calll memcpy +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: popl %esi +; CHECK-NOT: retl $4 +; CHECK-NEXT: retl +define void @ret_large_struct(%struct.st12_t* noalias nocapture sret %agg.result, %struct.st12_t* byval nocapture readonly align 4 %r) #0 { +entry: + %0 = bitcast %struct.st12_t* %agg.result to i8* + %1 = bitcast %struct.st12_t* %r to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 48, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: var_args: +; CHECK: movl 4(%esp), %eax +; CHECK-NEXT: retl +define i32 @var_args(i32 %i1, ...) #0 { +entry: + ret i32 %i1 +} + ; CHECK-LABEL: test_lib_args: ; CHECK: movl %edx, %eax ; CHECK: calll __fixsfsi -define i32 @test_lib_args(float inreg %a, float inreg %b) #0 { +define i32 @test_lib_args(float %a, float %b) #0 { %ret = fptosi float %b to i32 ret i32 %ret } +; CHECK-LABEL: test_fp128: +; CHECK: movl (%eax), %e[[CX:..]] +; CHECK-NEXT: movl 4(%eax), %e[[DX:..]] +; CHECK-NEXT: movl 8(%eax), %e[[SI:..]] +; CHECK-NEXT: movl 12(%eax), %e[[AX:..]] +; CHECK-NEXT: movl %e[[AX]], 12(%esp) +; CHECK-NEXT: movl %e[[SI]], 8(%esp) +; CHECK-NEXT: movl %e[[DX]], 4(%esp) +; CHECK-NEXT: movl %e[[CX]], (%esp) +; CHECK-NEXT: calll __fixtfsi +define i32 @test_fp128(fp128* %ptr) #0 { + %v = load fp128, fp128* %ptr + %ret = fptosi fp128 %v to i32 + ret i32 %ret +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 + attributes #0 = { nounwind "use-soft-float"="true"} +attributes #1 = { nounwind argmemonly }