diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9858,6 +9858,7 @@ Entry.Alignment = Alignment; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.NumFixedArgs += 1; + CLI.getArgs()[0].IndirectType = CLI.RetTy; CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); // sret demotion isn't compatible with tail-calls, since the sret argument diff --git a/llvm/lib/Target/Sparc/SparcCallingConv.td b/llvm/lib/Target/Sparc/SparcCallingConv.td --- a/llvm/lib/Target/Sparc/SparcCallingConv.td +++ b/llvm/lib/Target/Sparc/SparcCallingConv.td @@ -125,10 +125,14 @@ def RetCC_Sparc64 : CallingConv<[ // A single f32 return value always goes in %f0. The ABI doesn't specify what // happens to multiple f32 return values outside a struct. - CCIfType<[f32], CCCustom<"CC_Sparc64_Half">>, + CCIfType<[f32], CCCustom<"RetCC_Sparc64_Half">>, - // Otherwise, return values are passed exactly like arguments. - CCDelegateTo + // Otherwise, return values are passed exactly like arguments, except that + // returns that are too big to fit into the registers is passed as an sret + // instead. + CCIfInReg>>, + CCIfType<[i32], CCPromoteToType>, + CCCustom<"RetCC_Sparc64_Full"> ]>; // Callee-saved registers are handled by the register window mechanism. diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h --- a/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -144,6 +144,11 @@ SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -101,9 +101,9 @@ } // Allocate a full-sized argument for the 64-bit ABI. -static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, CCState &State) { +static bool Analyze_CC_Sparc64_Full(bool IsReturn, unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { assert((LocVT == MVT::f32 || LocVT == MVT::f128 || LocVT.getSizeInBits() == 64) && "Can't handle non-64 bits locations"); @@ -133,6 +133,11 @@ return true; } + // Bail out if this is a return CC and we run out of registers to place + // values into. + if (IsReturn) + return false; + // This argument goes on the stack in an 8-byte slot. // When passing floats, LocVT is smaller than 8 bytes. Adjust the offset to // the right-aligned float. The first 4 bytes of the stack slot are undefined. @@ -146,9 +151,9 @@ // Allocate a half-sized argument for the 64-bit ABI. // // This is used when passing { float, int } structs by value in registers. -static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, CCState &State) { +static bool Analyze_CC_Sparc64_Half(bool IsReturn, unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations"); unsigned Offset = State.AllocateStack(4, Align(4)); @@ -174,10 +179,43 @@ return true; } + // Bail out if this is a return CC and we run out of registers to place + // values into. + if (IsReturn) + return false; + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return true; } +static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + return Analyze_CC_Sparc64_Full(false, ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + return Analyze_CC_Sparc64_Half(false, ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +static bool RetCC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + return Analyze_CC_Sparc64_Full(true, ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +static bool RetCC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + return Analyze_CC_Sparc64_Half(true, ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + #include "SparcGenCallingConv.inc" // The calling conventions in SparcCallingConv.td are described in terms of the @@ -191,6 +229,15 @@ return Reg; } +bool SparcTargetLowering::CanLowerReturn( + CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, LLVMContext &Context) const { + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, Subtarget->is64Bit() ? RetCC_Sparc64 + : RetCC_Sparc32); +} + SDValue SparcTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -1030,6 +1077,7 @@ // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { + assert(RVLocs[i].isRegLoc() && "Can only return in registers!"); if (RVLocs[i].getLocVT() == MVT::v2i32) { SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2i32); SDValue Lo = DAG.getCopyFromReg( @@ -1344,6 +1392,7 @@ // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); unsigned Reg = toCallerWindow(VA.getLocReg()); // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can diff --git a/llvm/test/CodeGen/SPARC/64abi.ll b/llvm/test/CodeGen/SPARC/64abi.ll --- a/llvm/test/CodeGen/SPARC/64abi.ll +++ b/llvm/test/CodeGen/SPARC/64abi.ll @@ -293,33 +293,6 @@ ret void } -; Structs up to 32 bytes in size can be returned in registers. -; CHECK-LABEL: ret_i64_pair: -; CHECK: ldx [%i2], %i0 -; CHECK: ldx [%i3], %i1 -define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) { - %r1 = load i64, i64* %p - %rv1 = insertvalue { i64, i64 } undef, i64 %r1, 0 - store i64 0, i64* %p - %r2 = load i64, i64* %q - %rv2 = insertvalue { i64, i64 } %rv1, i64 %r2, 1 - ret { i64, i64 } %rv2 -} - -; CHECK-LABEL: call_ret_i64_pair: -; CHECK: call ret_i64_pair -; CHECK: stx %o0, [%i0] -; CHECK: stx %o1, [%i0] -define void @call_ret_i64_pair(i64* %i0) { - %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef, - i64* undef, i64* undef) - %e0 = extractvalue { i64, i64 } %rv, 0 - store volatile i64 %e0, i64* %i0 - %e1 = extractvalue { i64, i64 } %rv, 1 - store i64 %e1, i64* %i0 - ret void -} - ; This is not a C struct, the i32 member uses 8 bytes, but the float only 4. ; CHECK-LABEL: ret_i32_float_pair: ; CHECK: ld [%i2], %i0 diff --git a/llvm/test/CodeGen/SPARC/bigreturn.ll b/llvm/test/CodeGen/SPARC/bigreturn.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/bigreturn.ll @@ -0,0 +1,254 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=sparc -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck --check-prefix=SPARC %s +; RUN: llc < %s -mtriple=sparc64 -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck --check-prefix=SPARC64 %s + +;; Structs up to six registers in size can be returned in registers. +;; Note that the maximum return size and member placement is NOT +;; compatible with the C ABI - see SparcCallingConv.td. +define { i32, i32 } @ret_i32_pair(i32 %a0, i32 %a1, i32* %p, i32* %q) { +; SPARC-LABEL: ret_i32_pair: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: ld [%i2], %i0 +; SPARC-NEXT: st %g0, [%i2] +; SPARC-NEXT: ld [%i3], %i1 +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: ret_i32_pair: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -128, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: ld [%i2], %i0 +; SPARC64-NEXT: st %g0, [%i2] +; SPARC64-NEXT: ld [%i3], %i1 +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %r1 = load i32, i32* %p + %rv1 = insertvalue { i32, i32 } undef, i32 %r1, 0 + store i32 0, i32* %p + %r2 = load i32, i32* %q + %rv2 = insertvalue { i32, i32 } %rv1, i32 %r2, 1 + ret { i32, i32 } %rv2 +} + +define void @call_ret_i32_pair(i32* %i0) { +; SPARC-LABEL: call_ret_i32_pair: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: call ret_i32_pair +; SPARC-NEXT: nop +; SPARC-NEXT: st %o0, [%i0] +; SPARC-NEXT: st %o1, [%i0] +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: call_ret_i32_pair: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -176, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: call ret_i32_pair +; SPARC64-NEXT: nop +; SPARC64-NEXT: st %o0, [%i0] +; SPARC64-NEXT: st %o1, [%i0] +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %rv = call { i32, i32 } @ret_i32_pair(i32 undef, i32 undef, + i32* undef, i32* undef) + %e0 = extractvalue { i32, i32 } %rv, 0 + store volatile i32 %e0, i32* %i0 + %e1 = extractvalue { i32, i32 } %rv, 1 + store i32 %e1, i32* %i0 + ret void +} + +;; Functions returning structs more than six registers' worth of space +;; should be automatically treated as an sret function. +declare { [16 x i32] } @ret_i32_arr(i32 %input) + +define i32 @call_ret_i32_arr(i32 %0) { +; SPARC-LABEL: call_ret_i32_arr: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -160, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: add %fp, -64, %i1 +; SPARC-NEXT: st %i1, [%sp+64] +; SPARC-NEXT: mov %i0, %o0 +; SPARC-NEXT: call ret_i32_arr +; SPARC-NEXT: nop +; SPARC-NEXT: unimp 64 +; SPARC-NEXT: ld [%fp+-4], %i0 +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: call_ret_i32_arr: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -240, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: add %fp, 1983, %o0 +; SPARC64-NEXT: mov %i0, %o1 +; SPARC64-NEXT: call ret_i32_arr +; SPARC64-NEXT: nop +; SPARC64-NEXT: ld [%fp+2043], %i0 +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %2 = call { [16 x i32] } @ret_i32_arr(i32 %0) + %3 = extractvalue { [16 x i32] } %2, 0 + %4 = extractvalue [16 x i32] %3, 15 + ret i32 %4 +} + +;; Structs up to six registers in size can be returned in registers. +;; Note that the maximum return size and member placement is NOT +;; compatible with the C ABI - see SparcCallingConv.td. +define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) { +; SPARC-LABEL: ret_i64_pair: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: mov %g0, %i4 +; SPARC-NEXT: ldd [%i2], %i0 +; SPARC-NEXT: mov %i4, %i5 +; SPARC-NEXT: std %i4, [%i2] +; SPARC-NEXT: ldd [%i3], %i2 +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: ret_i64_pair: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -128, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: ldx [%i2], %i0 +; SPARC64-NEXT: stx %g0, [%i2] +; SPARC64-NEXT: ldx [%i3], %i1 +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %r1 = load i64, i64* %p + %rv1 = insertvalue { i64, i64 } undef, i64 %r1, 0 + store i64 0, i64* %p + %r2 = load i64, i64* %q + %rv2 = insertvalue { i64, i64 } %rv1, i64 %r2, 1 + ret { i64, i64 } %rv2 +} + +define void @call_ret_i64_pair(i64* %i0) { +; SPARC-LABEL: call_ret_i64_pair: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: call ret_i64_pair +; SPARC-NEXT: nop +; SPARC-NEXT: ! kill: def $o0 killed $o0 killed $o0_o1 def $o0_o1 +; SPARC-NEXT: ! kill: def $o2 killed $o2 killed $o2_o3 def $o2_o3 +; SPARC-NEXT: ! kill: def $o1 killed $o1 killed $o0_o1 def $o0_o1 +; SPARC-NEXT: std %o0, [%i0] +; SPARC-NEXT: ! kill: def $o3 killed $o3 killed $o2_o3 def $o2_o3 +; SPARC-NEXT: std %o2, [%i0] +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: call_ret_i64_pair: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -176, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: call ret_i64_pair +; SPARC64-NEXT: nop +; SPARC64-NEXT: stx %o0, [%i0] +; SPARC64-NEXT: stx %o1, [%i0] +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef, + i64* undef, i64* undef) + %e0 = extractvalue { i64, i64 } %rv, 0 + store volatile i64 %e0, i64* %i0 + %e1 = extractvalue { i64, i64 } %rv, 1 + store i64 %e1, i64* %i0 + ret void +} + +;; Functions returning structs more than six registers' worth of space +;; should be automatically treated as an sret function. +declare { [16 x i64] } @ret_i64_arr(i64 %input) + +define i64 @call_ret_i64_arr(i64 %0) { +; SPARC-LABEL: call_ret_i64_arr: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -224, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: add %fp, -128, %i2 +; SPARC-NEXT: st %i2, [%sp+64] +; SPARC-NEXT: mov %i0, %o0 +; SPARC-NEXT: mov %i1, %o1 +; SPARC-NEXT: call ret_i64_arr +; SPARC-NEXT: nop +; SPARC-NEXT: unimp 128 +; SPARC-NEXT: ldd [%fp+-8], %i0 +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: call_ret_i64_arr: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -304, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: add %fp, 1919, %o0 +; SPARC64-NEXT: mov %i0, %o1 +; SPARC64-NEXT: call ret_i64_arr +; SPARC64-NEXT: nop +; SPARC64-NEXT: ldx [%fp+2039], %i0 +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %2 = call { [16 x i64] } @ret_i64_arr(i64 %0) + %3 = extractvalue { [16 x i64] } %2, 0 + %4 = extractvalue [16 x i64] %3, 15 + ret i64 %4 +}