[X86] Better support for the MCU psABI (LLVM part)

This adds support for the MCU psABI in a way different from r251223 and r251224, basically reverting most of these two patches. The problem with the approach taken in r251223/4 is that it only handled libcalls that originated from the backend. However, the mid-end also inserts quite a few libcalls and assumes these use the platform's default calling convention. The previous patch tried to insert inregs when necessary both in the FE and, somewhat hackily, in the CG. Instead, we now define a new default calling convention for the MCU, which doesn't use inreg marking at all, similarly to what x86-64 does. Differential Revision: http://reviews.llvm.org/D15054 llvm-svn: 256494
llvm · Dec 28, 2015 · 2ea81ba · 2ea81ba
1 parent 175a7cb
commit 2ea81ba
Showing 10 changed files with 204 additions and 52 deletions.
diff --git a/llvm/include/llvm/Target/TargetCallingConv.h b/llvm/include/llvm/Target/TargetCallingConv.h
@@ -46,6 +46,8 @@ namespace ISD {
     static const uint64_t SplitOffs      = 11;
     static const uint64_t InAlloca       = 1ULL<<12; ///< Passed with inalloca
     static const uint64_t InAllocaOffs   = 12;
+    static const uint64_t SplitEnd       = 1ULL<<13; ///< Last part of a split
+    static const uint64_t SplitEndOffs   = 13;
     static const uint64_t OrigAlign      = 0x1FULL<<27;
     static const uint64_t OrigAlignOffs  = 27;
     static const uint64_t ByValSize      = 0x3fffffffULL<<32; ///< Struct size
@@ -103,6 +105,9 @@ namespace ISD {
     bool isSplit()   const { return Flags & Split; }
     void setSplit()  { Flags |= One << SplitOffs; }
 
+    bool isSplitEnd()   const { return Flags & SplitEnd; }
+    void setSplitEnd()  { Flags |= One << SplitEndOffs; }
+
     unsigned getOrigAlign() const {
       return (unsigned)
         ((One << ((Flags & OrigAlign) >> OrigAlignOffs)) / 2);

diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h
@@ -2453,13 +2453,6 @@ class TargetLowering : public TargetLoweringBase {
 
   };
 
-  // Mark inreg arguments for lib-calls. For normal calls this is done by
-  // the frontend ABI code.
-  virtual void markInRegArguments(SelectionDAG &DAG, 
-                 TargetLowering::ArgListTy &Args) const {
-    return;
-  }
-
   /// This function lowers an abstract call to a function into an actual call.
   /// This returns a pair of operands.  The first element is the return value
   /// for the function (if RetTy is not VoidTy).  The second element is the

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7145,8 +7145,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
                                i, j*Parts[j].getValueType().getStoreSize());
         if (NumParts > 1 && j == 0)
           MyFlags.Flags.setSplit();
-        else if (j != 0)
+        else if (j != 0) {
           MyFlags.Flags.setOrigAlign(1);
+          if (j == NumParts - 1)
+            MyFlags.Flags.setSplitEnd();
+        }
 
         CLI.Outs.push_back(MyFlags);
         CLI.OutVals.push_back(Parts[j]);
@@ -7390,8 +7393,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         if (NumRegs > 1 && i == 0)
           MyFlags.Flags.setSplit();
         // if it isn't first piece, alignment must be 1
-        else if (i > 0)
+        else if (i > 0) {
           MyFlags.Flags.setOrigAlign(1);
+          if (i == NumRegs - 1)
+            MyFlags.Flags.setSplitEnd();
+        }
         Ins.push_back(MyFlags);
       }
       if (NeedsRegBlock && Value == NumValues - 1)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -101,8 +101,6 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
     Args.push_back(Entry);
   }
 
-  markInRegArguments(DAG, Args);
-
   if (LC == RTLIB::UNKNOWN_LIBCALL)
     report_fatal_error("Unsupported library call operation!");
   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),

diff --git a/llvm/lib/Target/X86/X86CallingConv.h b/llvm/lib/Target/X86/X86CallingConv.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
 #define LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
 
+#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/IR/CallingConv.h"
 
@@ -42,6 +43,64 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
   return false;
 }
 
+inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
+                                         MVT &LocVT,
+                                         CCValAssign::LocInfo &LocInfo,
+                                         ISD::ArgFlagsTy &ArgFlags,
+                                         CCState &State) {
+  // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
+  // not to split i64 and double between a register and stack
+  static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
+  static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]);
+
+  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+  // If this is the first part of an double/i64/i128, or if we're already
+  // in the middle of a split, add to the pending list. If this is not
+  // the end of the split, return, otherwise go on to process the pending
+  // list
+  if (ArgFlags.isSplit() || !PendingMembers.empty()) {
+    PendingMembers.push_back(
+        CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+    if (!ArgFlags.isSplitEnd())
+      return true;
+  }
+
+  // If there are no pending members, we are not in the middle of a split,
+  // so do the usual inreg stuff.
+  if (PendingMembers.empty()) {
+    if (unsigned Reg = State.AllocateReg(RegList)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return true;
+    }
+    return false;
+  }
+
+  assert(ArgFlags.isSplitEnd());
+
+  // We now have the entire original argument in PendingMembers, so decide
+  // whether to use registers or the stack.
+  // Per the MCU ABI:
+  // a) To use registers, we need to have enough of them free to contain
+  // the entire argument.
+  // b) We never want to use more than 2 registers for a single argument.
+
+  unsigned FirstFree = State.getFirstUnallocated(RegList);
+  bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);
+
+  for (auto &It : PendingMembers) {
+    if (UseRegs)
+      It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
+    else
+      It.convertToMem(State.AllocateStack(4, 4));
+    State.addLoc(It);
+  }
+
+  PendingMembers.clear();
+
+  return true;
+}
+
 } // End llvm namespace
 
 #endif

diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
@@ -592,6 +592,23 @@ def CC_X86_32_C : CallingConv<[
   CCDelegateTo<CC_X86_32_Common>
 ]>;
 
+def CC_X86_32_MCU : CallingConv<[
+  // Handles byval parameters.  Note that, like FastCC, we can't rely on
+  // the delegation to CC_X86_32_Common because that happens after code that
+  // puts arguments in registers.
+  CCIfByVal<CCPassByVal<4, 4>>,
+
+  // Promote i1/i8/i16 arguments to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+  // If the call is not a vararg call, some arguments may be passed
+  // in integer registers.
+  CCIfNotVarArg<CCIfType<[i32], CCCustom<"CC_X86_32_MCUInReg">>>,
+
+  // Otherwise, same as everything else.
+  CCDelegateTo<CC_X86_32_Common>
+]>;
+
 def CC_X86_32_FastCall : CallingConv<[
   // Promote i1/i8/i16 arguments to i32.
   CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -753,6 +770,7 @@ def CC_X86_64_Intr : CallingConv<[
 
 // This is the root argument convention for the X86-32 backend.
 def CC_X86_32 : CallingConv<[
+  CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
   CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
   CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_32_VectorCall>>,
   CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,

diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1098,12 +1098,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
     RetRegs.push_back(VA.getLocReg());
   }
 
-  // The x86-64 ABI for returning structs by value requires that we copy
-  // the sret argument into %rax for the return. We saved the argument into
-  // a virtual register in the entry block, so now we copy the value out
-  // and into %rax. We also do the same with %eax for Win32.
-  if (F.hasStructRetAttr() &&
-      (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
+  // All x86 ABIs require that for returning structs by value we copy
+  // the sret argument into %rax/%eax (depending on ABI) for the return.
+  // We saved the argument into a virtual register in the entry block,
+  // so now we copy the value out and into %rax/%eax.
+  if (F.hasStructRetAttr()) {
     unsigned Reg = X86MFInfo->getSRetReturnReg();
     assert(Reg &&
            "SRetReturnReg should have been set in LowerFormalArguments()!");
@@ -2820,7 +2819,7 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
 
   if (CS)
     if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) ||
-        CS->paramHasAttr(1, Attribute::InReg))
+        CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU())
       return 0;
 
   return 4;

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2447,28 +2447,28 @@ enum StructReturnType {
   StackStructReturn
 };
 static StructReturnType
-callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
   if (Outs.empty())
     return NotStructReturn;
 
   const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
   if (!Flags.isSRet())
     return NotStructReturn;
-  if (Flags.isInReg())
+  if (Flags.isInReg() || IsMCU)
     return RegStructReturn;
   return StackStructReturn;
 }
 
 /// Determines whether a function uses struct return semantics.
 static StructReturnType
-argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
   if (Ins.empty())
     return NotStructReturn;
 
   const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
   if (!Flags.isSRet())
     return NotStructReturn;
-  if (Flags.isInReg())
+  if (Flags.isInReg() || IsMCU)
     return RegStructReturn;
   return StackStructReturn;
 }
@@ -2945,7 +2945,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
     // If this is an sret function, the return should pop the hidden pointer.
     if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
         !Subtarget->getTargetTriple().isOSMSVCRT() &&
-        argsAreStructReturn(Ins) == StackStructReturn)
+        argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn)
       FuncInfo->setBytesToPopOnReturn(4);
   }
 
@@ -3065,7 +3065,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   MachineFunction &MF = DAG.getMachineFunction();
   bool Is64Bit        = Subtarget->is64Bit();
   bool IsWin64        = Subtarget->isCallingConvWin64(CallConv);
-  StructReturnType SR = callIsStructReturn(Outs);
+  StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU());
   bool IsSibcall      = false;
   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
   auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
@@ -28661,27 +28661,3 @@ bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
                                    Attribute::MinSize);
   return OptSize && !VT.isVector();
 }
-
-void X86TargetLowering::markInRegArguments(SelectionDAG &DAG,
-       TargetLowering::ArgListTy& Args) const {
-  // The MCU psABI requires some arguments to be passed in-register.
-  // For regular calls, the inreg arguments are marked by the front-end.
-  // However, for compiler generated library calls, we have to patch this
-  // up here.
-  if (!Subtarget->isTargetMCU() || !Args.size())
-    return;
-
-  unsigned FreeRegs = 3;
-  for (auto &Arg : Args) {
-    // For library functions, we do not expect any fancy types.
-    unsigned Size = DAG.getDataLayout().getTypeSizeInBits(Arg.Ty);
-    unsigned SizeInRegs = (Size + 31) / 32;
-    if (SizeInRegs > 2 || SizeInRegs > FreeRegs)
-      continue;
-
-    Arg.isInReg = true;
-    FreeRegs -= SizeInRegs;
-    if (!FreeRegs)
-      break;
-  }
-}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -927,9 +927,6 @@ namespace llvm {
 
     bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
 
-    void markInRegArguments(SelectionDAG &DAG, TargetLowering::ArgListTy& Args)
-      const override;
-
   protected:
     std::pair<const TargetRegisterClass *, uint8_t>
     findRepresentativeClass(const TargetRegisterInfo *TRI,

diff --git a/llvm/test/CodeGen/X86/mcu-abi.ll b/llvm/test/CodeGen/X86/mcu-abi.ll
@@ -1,11 +1,112 @@
 ; RUN: llc < %s -mtriple=i686-pc-elfiamcu | FileCheck %s
 
+%struct.st12_t = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+; CHECK-LABEL: test_ints:
+; CHECK: addl    %edx, %eax
+; CHECK-NEXT: imull   %ecx, %eax
+; CHECK-NEXT: addl    4(%esp), %eax
+; CHECK-NEXT: retl
+define i32 @test_ints(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
+entry:
+  %r1 = add i32 %b, %a
+  %r2 = mul i32 %c, %r1
+  %r3 = add i32 %d, %r2
+  ret i32 %r3
+}
+
+; CHECK-LABEL: test_floats:
+; CHECK: addl    %edx, %eax
+; CHECK-NEXT: imull   %ecx, %eax
+; CHECK-NEXT: addl    4(%esp), %eax
+; CHECK-NEXT: retl
+define i32 @test_floats(i32 %a, i32 %b, float %c, float %d) #0 {
+entry:
+  %ci = bitcast float %c to i32
+  %di = bitcast float %d to i32
+  %r1 = add i32 %b, %a
+  %r2 = mul i32 %ci, %r1
+  %r3 = add i32 %di, %r2
+  ret i32 %r3
+}
+
+; CHECK-LABEL: test_doubles:
+; CHECK: addl    4(%esp), %eax
+; CHECK-NEXT: adcl    8(%esp), %edx
+; CHECK-NEXT: retl
+define double @test_doubles(double %d1, double %d2) #0 {
+entry:
+    %d1i = bitcast double %d1 to i64
+    %d2i = bitcast double %d2 to i64
+    %r = add i64 %d1i, %d2i
+    %rd = bitcast i64 %r to double
+    ret double %rd
+}
+
+; CHECK-LABEL: test_mixed_doubles:
+; CHECK: addl    %ecx, %eax
+; CHECK-NEXT: adcl    $0, %edx
+; CHECK-NEXT: retl
+define double @test_mixed_doubles(double %d2, i32 %i) #0 {
+entry:
+    %iext = zext i32 %i to i64
+    %d2i = bitcast double %d2 to i64
+    %r = add i64 %iext, %d2i
+    %rd = bitcast i64 %r to double
+    ret double %rd
+}
+
+; CHECK-LABEL: ret_large_struct:
+; CHECK: pushl   %esi
+; CHECK-NEXT: movl    %eax, %esi
+; CHECK-NEXT: leal    8(%esp), %edx
+; CHECK-NEXT: movl    $48, %ecx
+; CHECK-NEXT: calll   memcpy
+; CHECK-NEXT: movl    %esi, %eax
+; CHECK-NEXT: popl    %esi
+; CHECK-NOT:  retl $4
+; CHECK-NEXT: retl
+define void @ret_large_struct(%struct.st12_t* noalias nocapture sret %agg.result, %struct.st12_t* byval nocapture readonly align 4 %r) #0 {
+entry:
+  %0 = bitcast %struct.st12_t* %agg.result to i8*
+  %1 = bitcast %struct.st12_t* %r to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 48, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: var_args:
+; CHECK: movl    4(%esp), %eax
+; CHECK-NEXT: retl
+define i32 @var_args(i32 %i1, ...) #0 {
+entry:
+  ret i32 %i1
+}
+
 ; CHECK-LABEL: test_lib_args:
 ; CHECK: movl %edx, %eax
 ; CHECK: calll __fixsfsi
-define i32 @test_lib_args(float inreg %a, float inreg %b) #0 {
+define i32 @test_lib_args(float %a, float %b) #0 {
   %ret = fptosi float %b to i32
   ret i32 %ret
 }
 
+; CHECK-LABEL: test_fp128:
+; CHECK: movl    (%eax), %e[[CX:..]]
+; CHECK-NEXT: movl    4(%eax), %e[[DX:..]]
+; CHECK-NEXT: movl    8(%eax), %e[[SI:..]]
+; CHECK-NEXT: movl    12(%eax), %e[[AX:..]]
+; CHECK-NEXT: movl    %e[[AX]], 12(%esp)
+; CHECK-NEXT: movl    %e[[SI]], 8(%esp)
+; CHECK-NEXT: movl    %e[[DX]], 4(%esp)
+; CHECK-NEXT: movl    %e[[CX]], (%esp)
+; CHECK-NEXT: calll   __fixtfsi
+define i32 @test_fp128(fp128* %ptr) #0 {
+  %v = load fp128, fp128* %ptr
+  %ret = fptosi fp128 %v to i32
+  ret i32 %ret
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
 attributes #0 = { nounwind "use-soft-float"="true"}
+attributes #1 = { nounwind argmemonly }