Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6843,10 +6843,10 @@ assert(IsPPC64 && "PPC32 should have split i64 values."); LLVM_FALLTHROUGH; case MVT::i1: - case MVT::i32: - State.AllocateStack(PtrByteSize, PtrByteSize); + case MVT::i32: { + const unsigned Offset = State.AllocateStack(PtrByteSize, PtrByteSize); + const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) { - MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; // Promote integers if needed. if (ValVT.getSizeInBits() < RegVT.getSizeInBits()) LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt @@ -6854,38 +6854,41 @@ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo)); } else - report_fatal_error("Handling of placing parameters on the stack is " - "unimplemented!"); - return false; + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo)); + return false; + } case MVT::f32: case MVT::f64: { // Parameter save area (PSA) is reserved even if the float passes in fpr. const unsigned StoreSize = LocVT.getStoreSize(); // Floats are always 4-byte aligned in the PSA on AIX. // This includes f64 in 64-bit mode for ABI compatibility. - State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4); - if (unsigned Reg = State.AllocateReg(FPR)) - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - else - report_fatal_error("Handling of placing parameters on the stack is " - "unimplemented!"); + const unsigned Offset = State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4); + unsigned FReg = State.AllocateReg(FPR); + if (FReg) + State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo)); - // AIX requires that GPRs are reserved for float arguments. - // Successfully reserved GPRs are only initialized for vararg calls. + // Reserve and initialize GPRs or initialize the PSA as required. MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; for (unsigned I = 0; I < StoreSize; I += PtrByteSize) { if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) { + assert(FReg && "An FPR should be available when a GPR is reserved."); if (State.isVarArg()) { + // Successfully reserved GPRs are only initialized for vararg calls. // Custom handling is required for: // f64 in PPC32 needs to be split into 2 GPRs. // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR. State.addLoc( CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo)); } - } else if (State.isVarArg()) { - report_fatal_error("Handling of placing parameters on the stack is " - "unimplemented!"); + } else { + // If there are insufficient GPRs, the PSA needs to be initialized. + // Initialization occurs even if an FPR was initialized for + // compatibility with the AIX XL compiler. The full memory for the + // argument will be initialized even if a prior word is saved in GPR. + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + break; } } @@ -6968,23 +6971,32 @@ CCInfo.AllocateStack(LinkageSize + MinParameterSaveArea, PtrByteSize); CCInfo.AnalyzeFormalArguments(Ins, CC_AIX); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - SDValue ArgValue; - ISD::ArgFlagsTy Flags = Ins[i].Flags; + for (unsigned I = 0, E = ArgLocs.size(); I != E;) { + CCValAssign &VA = ArgLocs[I++]; + ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags; if (VA.isRegLoc()) { EVT ValVT = VA.getValVT(); MVT LocVT = VA.getLocVT(); MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy; unsigned VReg = MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64)); - ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); if (ValVT.isScalarInteger() && (ValVT.getSizeInBits() < LocVT.getSizeInBits())) { ArgValue = truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl); } InVals.push_back(ArgValue); + + // For compatibility with the AIX XL compiler, the float args in the + // parameter save area are initialized even if the argument is available + // in register. The caller is required to initialize both the register + // and memory, however, the callee can choose to expect it in either. The + // memloc is dismissed here because the argument is retrieved from the + // register. + if ((ValVT == MVT::f32 || ValVT == MVT::f64) && I != E && + ArgLocs[I].isMemLoc() && ArgLocs[I].getValNo() == VA.getValNo()) + ++I; } else { report_fatal_error("Handling of formal arguments on the stack is " "unimplemented!"); @@ -7039,6 +7051,7 @@ // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64. const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); const bool IsPPC64 = Subtarget.isPPC64(); + const EVT PtrVT = getPointerTy(DAG.getDataLayout()); const unsigned PtrByteSize = IsPPC64 ? 8 : 4; CCInfo.AllocateStack(LinkageSize, PtrByteSize); CCInfo.AnalyzeCallOperands(Outs, CC_AIX); @@ -7050,7 +7063,8 @@ // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize; - const unsigned NumBytes = LinkageSize + MinParameterSaveAreaSize; + const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize, + CCInfo.getNextStackOffset()); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass. @@ -7058,19 +7072,32 @@ SDValue CallSeqStart = Chain; SmallVector, 8> RegsToPass; + SmallVector MemOpChains; + + // Set up a copy of the stack pointer for loading and storing any + // arguments that may not fit in the registers available for argument + // passing. + const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64) + : DAG.getRegister(PPC::R1, MVT::i32); for (unsigned I = 0, E = ArgLocs.size(); I != E;) { CCValAssign &VA = ArgLocs[I++]; - if (VA.isMemLoc()) - report_fatal_error("Handling of placing parameters on the stack is " - "unimplemented!"); - if (!VA.isRegLoc()) - report_fatal_error( - "Unexpected non-register location for function call argument."); - SDValue Arg = OutVals[VA.getValNo()]; + if (!VA.isRegLoc() && !VA.isMemLoc()) + report_fatal_error("Unexpected location for function call argument."); + + if (VA.isMemLoc()) { + SDValue PtrOff = + DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + MemOpChains.push_back( + DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); + + continue; + } + if (!VA.needsCustom()) { switch (VA.getLocInfo()) { default: @@ -7116,15 +7143,23 @@ DAG.getConstant(32, dl, MVT::i8)); RegsToPass.push_back(std::make_pair( GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32))); - assert(I != E && "A second custom GPR is expected!"); - CCValAssign &GPR2 = ArgLocs[I++]; - assert(GPR2.isRegLoc() && GPR2.getValNo() == GPR1.getValNo() && - GPR2.needsCustom() && "A second custom GPR is expected!"); - RegsToPass.push_back(std::make_pair( - GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32))); + + if (I != E) { + // If only 1 GPR was available, there will only be one custom GPR and the argument will also pass in memory. + CCValAssign &PeekArg = ArgLocs[I]; + if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) { + assert(PeekArg.needsCustom() && "A second custom GPR is expected."); + CCValAssign &GPR2 = ArgLocs[I++]; + RegsToPass.push_back(std::make_pair( + GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32))); + } + } } } + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); + // For indirect calls, we need to save the TOC base to the stack for // restoration after the call. if (!isTailCall && !isPatchPoint && Index: llvm/test/CodeGen/PowerPC/aix-cc-abi.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -447,41 +447,109 @@ ; CHECK-LABEL: name: call_test_fpr_max{{.*}} -; 32BIT: renamable $r3 = LWZtoc @d1, $r2 :: (load 4 from got) -; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load 8 from @d1) -; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: $f2 = COPY renamable $f1 -; 32BIT-NEXT: $f3 = COPY renamable $f1 -; 32BIT-NEXT: $f4 = COPY renamable $f1 -; 32BIT-NEXT: $f5 = COPY renamable $f1 -; 32BIT-NEXT: $f6 = COPY renamable $f1 -; 32BIT-NEXT: $f7 = COPY renamable $f1 -; 32BIT-NEXT: $f8 = COPY renamable $f1 -; 32BIT-NEXT: $f9 = COPY renamable $f1 -; 32BIT-NEXT: $f10 = COPY renamable $f1 -; 32BIT-NEXT: $f11 = COPY renamable $f1 -; 32BIT-NEXT: $f12 = COPY renamable $f1 -; 32BIT-NEXT: $f13 = COPY renamable $f1 -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $r2, implicit-def $r1 -; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: renamable $x3 = LDtoc @d1, $x2 :: (load 8 from got) -; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x3 :: (dereferenceable load 8 from @d1) -; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: $f2 = COPY renamable $f1 -; 64BIT-NEXT: $f3 = COPY renamable $f1 -; 64BIT-NEXT: $f4 = COPY renamable $f1 -; 64BIT-NEXT: $f5 = COPY renamable $f1 -; 64BIT-NEXT: $f6 = COPY renamable $f1 -; 64BIT-NEXT: $f7 = COPY renamable $f1 -; 64BIT-NEXT: $f8 = COPY renamable $f1 -; 64BIT-NEXT: $f9 = COPY renamable $f1 -; 64BIT-NEXT: $f10 = COPY renamable $f1 -; 64BIT-NEXT: $f11 = COPY renamable $f1 -; 64BIT-NEXT: $f12 = COPY renamable $f1 -; 64BIT-NEXT: $f13 = COPY renamable $f1 +; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) +; 32BIT-NEXT: ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-DAG: STFD renamable $f1, 56, $r1 :: (store 8) +; 32BIT-DAG: STFD renamable $f1, 64, $r1 :: (store 8) +; 32BIT-DAG: STFD renamable $f1, 72, $r1 :: (store 8) +; 32BIT-DAG: STFD renamable $f1, 80, $r1 :: (store 8) +; 32BIT-DAG: STFD renamable $f1, 88, $r1 :: (store 8) +; 32BIT-DAG: STFD renamable $f1, 96, $r1 :: (store 8) +; 32BIT-DAG: STFD renamable $f1, 104, $r1 :: (store 8) +; 32BIT-DAG: STFD renamable $f1, 112, $r1 :: (store 8) +; 32BIT-DAG: STFD renamable $f1, 120, $r1 :: (store 8) +; 32BIT-DAG: $f2 = COPY renamable $f1 +; 32BIT-DAG: $f3 = COPY renamable $f1 +; 32BIT-DAG: $f4 = COPY renamable $f1 +; 32BIT-DAG: $f5 = COPY renamable $f1 +; 32BIT-DAG: $f6 = COPY renamable $f1 +; 32BIT-DAG: $f7 = COPY renamable $f1 +; 32BIT-DAG: $f8 = COPY renamable $f1 +; 32BIT-DAG: $f9 = COPY renamable $f1 +; 32BIT-DAG: $f10 = COPY renamable $f1 +; 32BIT-DAG: $f11 = COPY renamable $f1 +; 32BIT-DAG: $f12 = COPY renamable $f1 +; 32BIT-DAG: $f13 = COPY renamable $f1 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $r2, implicit-def $r1, implicit-def dead $f1 +; 32BIT-NEXT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1 + +; CHECKASM-LABEL: .call_test_fpr_max: + +; ASM32PWR4: stwu 1, -128(1) +; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC2(2) +; ASM32PWR4-NEXT: lfd 1, 0([[REG]]) +; ASM32PWR4-DAG: stfd 1, 56(1) +; ASM32PWR4-DAG: stfd 1, 64(1) +; ASM32PWR4-DAG: stfd 1, 72(1) +; ASM32PWR4-DAG: stfd 1, 80(1) +; ASM32PWR4-DAG: stfd 1, 88(1) +; ASM32PWR4-DAG: stfd 1, 96(1) +; ASM32PWR4-DAG: stfd 1, 104(1) +; ASM32PWR4-DAG: stfd 1, 112(1) +; ASM32PWR4-DAG: stfd 1, 120(1) +; ASM32PWR4-DAG: fmr 2, 1 +; ASM32PWR4-DAG: fmr 3, 1 +; ASM32PWR4-DAG: fmr 4, 1 +; ASM32PWR4-DAG: fmr 5, 1 +; ASM32PWR4-DAG: fmr 6, 1 +; ASM32PWR4-DAG: fmr 7, 1 +; ASM32PWR4-DAG: fmr 8, 1 +; ASM32PWR4-DAG: fmr 9, 1 +; ASM32PWR4-DAG: fmr 10, 1 +; ASM32PWR4-DAG: fmr 11, 1 +; ASM32PWR4-DAG: fmr 12, 1 +; ASM32PWR4-DAG: fmr 13, 1 +; ASM32PWR4-NEXT: bl .test_fpr_max +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 128 + +; 64BIT: renamable $x[[REGD1ADDR:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x[[REGD1ADDR:[0-9]+]] :: (dereferenceable load 8 from @d1) +; 64BIT-NEXT: ADJCALLSTACKDOWN 152, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-DAG: STFD renamable $f1, 112, $x1 :: (store 8) +; 64BIT-DAG: STFD renamable $f1, 120, $x1 :: (store 8) +; 64BIT-DAG: STFD renamable $f1, 128, $x1 :: (store 8) +; 64BIT-DAG: STFD renamable $f1, 136, $x1 :: (store 8) +; 64BIT-DAG: STFD renamable $f1, 144, $x1 :: (store 8) +; 64BIT-DAG: $f2 = COPY renamable $f1 +; 64BIT-DAG: $f3 = COPY renamable $f1 +; 64BIT-DAG: $f4 = COPY renamable $f1 +; 64BIT-DAG: $f5 = COPY renamable $f1 +; 64BIT-DAG: $f6 = COPY renamable $f1 +; 64BIT-DAG: $f7 = COPY renamable $f1 +; 64BIT-DAG: $f8 = COPY renamable $f1 +; 64BIT-DAG: $f9 = COPY renamable $f1 +; 64BIT-DAG: $f10 = COPY renamable $f1 +; 64BIT-DAG: $f11 = COPY renamable $f1 +; 64BIT-DAG: $f12 = COPY renamable $f1 +; 64BIT-DAG: $f13 = COPY renamable $f1 ; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $x2, implicit-def $r1 -; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 152, 0, implicit-def dead $r1, implicit $r1 + +; ASM64PWR4: stdu 1, -160(1) +; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC2(2) +; ASM64PWR4-NEXT: lfd 1, 0([[REG]]) +; ASM64PWR4-DAG: stfd 1, 112(1) +; ASM64PWR4-DAG: stfd 1, 120(1) +; ASM64PWR4-DAG: stfd 1, 128(1) +; ASM64PWR4-DAG: stfd 1, 136(1) +; ASM64PWR4-DAG: stfd 1, 144(1) +; ASM64PWR4-DAG: fmr 2, 1 +; ASM64PWR4-DAG: fmr 3, 1 +; ASM64PWR4-DAG: fmr 4, 1 +; ASM64PWR4-DAG: fmr 5, 1 +; ASM64PWR4-DAG: fmr 6, 1 +; ASM64PWR4-DAG: fmr 7, 1 +; ASM64PWR4-DAG: fmr 8, 1 +; ASM64PWR4-DAG: fmr 9, 1 +; ASM64PWR4-DAG: fmr 10, 1 +; ASM64PWR4-DAG: fmr 11, 1 +; ASM64PWR4-DAG: fmr 12, 1 +; ASM64PWR4-DAG: fmr 13, 1 +; ASM64PWR4-NEXT: bl .test_fpr_max +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 160 define double @test_fpr_max(double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13) { entry: @@ -829,8 +897,8 @@ ; ASM64PWR4-NEXT: lfd 2, 0([[REG]]) ; ASM64PWR4-NEXT: li 3, 42 ; ASM64PWR4-NEXT: stfd 2, 120(1) -; ASM64PWR4-DAG: ld 4, 112(1) -; ASM64PWR4-DAG: ld 6, 120(1) +; ASM64PWR4-DAG: ld 4, 112(1) +; ASM64PWR4-DAG: ld 6, 120(1) ; ASM64PWR4-NEXT: bl .test_vararg ; ASM64PWR4-NEXT: nop @@ -878,3 +946,244 @@ ; ASM64PWR4-NEXT: lwz 4, 124(1) ; ASM64PWR4-NEXT: bl .test_vararg ; ASM64PWR4-NEXT: nop + +@c = common global i8 0, align 1 +@si = common global i16 0, align 2 +@i = common global i32 0, align 4 +@lli = common global i64 0, align 8 +@f = common global float 0.000000e+00, align 4 +@d = common global double 0.000000e+00, align 8 + +; Basic saving of integral type arguments to the parameter save area. +define void @call_test_stackarg_int() { +entry: + %0 = load i8, i8* @c, align 1 + %1 = load i16, i16* @si, align 2 + %2 = load i32, i32* @i, align 4 + %3 = load i64, i64* @lli, align 8 + %4 = load i32, i32* @i, align 4 + call void @test_stackarg_int(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i8 zeroext %0, i16 signext %1, i32 %2, i64 %3, i32 %4) + ret void +} + +declare void @test_stackarg_int(i32, i32, i32, i32, i32, i32, i32, i32, i8 zeroext, i16 signext, i32, i64, i32) + +; CHECK-LABEL: name: call_test_stackarg_int{{.*}} + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; 32BIT-DAG: renamable $r[[REGCADDR:[0-9]+]] = LWZtoc @c, $r2 :: (load 4 from got) +; 32BIT-DAG: renamable $r[[REGC:[0-9]+]] = LBZ 0, killed renamable $r[[REGCADDR]] :: (dereferenceable load 1 from @c) +; 32BIT-DAG: renamable $r[[REGSIADDR:[0-9]+]] = LWZtoc @si, $r2 :: (load 4 from got) +; 32BIT-DAG: renamable $r[[REGSI:[0-9]+]] = LHA 0, killed renamable $r[[REGSIADDR]] :: (dereferenceable load 2 from @si) +; 32BIT-DAG: renamable $r[[REGIADDR:[0-9]+]] = LWZtoc @i, $r2 :: (load 4 from got) +; 32BIT-DAG: renamable $r[[REGI:[0-9]+]] = LWZ 0, killed renamable $r[[REGIADDR]] :: (dereferenceable load 4 from @i) +; 32BIT-DAG: renamable $r[[REGLLIADDR:[0-9]+]] = LWZtoc @lli, $r2 :: (load 4 from got) +; 32BIT-DAG: renamable $r[[REGLLI1:[0-9]+]] = LWZ 0, renamable $r[[REGLLIADDR]] :: (dereferenceable load 4 from @lli, align 8) +; 32BIT-DAG: renamable $r[[REGLLI2:[0-9]+]] = LWZ 4, killed renamable $r[[REGLLIADDR]] :: (dereferenceable load 4 from @lli + 4) +; 32BIT-NEXT: ADJCALLSTACKDOWN 80, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-DAG: STW killed renamable $r[[REGC]], 56, $r1 :: (store 4) +; 32BIT-DAG: STW killed renamable $r[[REGSI]], 60, $r1 :: (store 4) +; 32BIT-DAG: STW killed renamable $r[[REGI]], 64, $r1 :: (store 4) +; 32BIT-DAG: STW killed renamable $r[[REGLLI1]], 68, $r1 :: (store 4) +; 32BIT-DAG: STW killed renamable $r[[REGLLI2]], 72, $r1 :: (store 4) +; 32BIT-DAG: STW renamable $r[[REGI]], 76, $r1 :: (store 4) +; 32BIT-DAG: $r3 = LI 1 +; 32BIT-DAG: $r4 = LI 2 +; 32BIT-DAG: $r5 = LI 3 +; 32BIT-DAG: $r6 = LI 4 +; 32BIT-DAG: $r7 = LI 5 +; 32BIT-DAG: $r8 = LI 6 +; 32BIT-DAG: $r9 = LI 7 +; 32BIT-DAG: $r10 = LI 8 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 80, 0, implicit-def dead $r1, implicit $r1 + +; Basic saving of floating point type arguments to the parameter save area. +; The float and double arguments will pass in both fpr as well as parameter save area. +define void @call_test_stackarg_float() { +entry: + %0 = load float, float* @f, align 4 + %1 = load double, double* @d, align 8 + call void @test_stackarg_float(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, float %0, double %1) + ret void +} + +declare void @test_stackarg_float(i32, i32, i32, i32, i32, i32, i32, i32, float, double) + +; CHECK-LABEL: name: call_test_stackarg_float + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; 32BIT-DAG: renamable $r[[REGF:[0-9]+]] = LWZtoc @f, $r2 :: (load 4 from got) +; 32BIT-DAG: renamable $f1 = LFS 0, killed renamable $r[[REGF]] :: (dereferenceable load 4 from @f) +; 32BIT-DAG: renamable $r[[REGD:[0-9]+]] = LWZtoc @d, $r2 :: (load 4 from got) +; 32BIT-DAG: renamable $f2 = LFD 0, killed renamable $r[[REGD]] :: (dereferenceable load 8 from @d) +; 32BIT-NEXT: ADJCALLSTACKDOWN 68, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-DAG: STFS renamable $f1, 56, $r1 :: (store 4) +; 32BIT-DAG: STFD renamable $f2, 60, $r1 :: (store 8) +; 32BIT-DAG: $r3 = LI 1 +; 32BIT-DAG: $r4 = LI 2 +; 32BIT-DAG: $r5 = LI 3 +; 32BIT-DAG: $r6 = LI 4 +; 32BIT-DAG: $r7 = LI 5 +; 32BIT-DAG: $r8 = LI 6 +; 32BIT-DAG: $r9 = LI 7 +; 32BIT-DAG: $r10 = LI 8 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $f1, implicit $f2, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 68, 0, implicit-def dead $r1, implicit $r1 + +; CHECKASM-LABEL: .call_test_stackarg_float: + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; ASM32PWR4: stwu 1, -80(1) +; ASM32PWR4-DAG: lwz [[REGF:[0-9]+]], LC8(2) +; ASM32PWR4-DAG: lfs 1, 0([[REGF]]) +; ASM32PWR4-DAG: lwz [[REGD:[0-9]+]], LC9(2) +; ASM32PWR4-DAG: lfd 2, 0([[REGD:[0-9]+]]) +; ASM32PWR4-DAG: stfs 1, 56(1) +; ASM32PWR4-DAG: stfd 2, 60(1) +; ASM32PWR4-DAG: li 3, 1 +; ASM32PWR4-DAG: li 4, 2 +; ASM32PWR4-DAG: li 5, 3 +; ASM32PWR4-DAG: li 6, 4 +; ASM32PWR4-DAG: li 7, 5 +; ASM32PWR4-DAG: li 8, 6 +; ASM32PWR4-DAG: li 9, 7 +; ASM32PWR4-DAG: li 10, 8 +; ASM32PWR4-NEXT: bl .test_stackarg_float +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 80 + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; 64BIT-DAG: renamable $x[[REGF:[0-9]+]] = LDtoc @f, $x2 :: (load 8 from got) +; 64BIT-DAG: renamable $f1 = LFS 0, killed renamable $x[[REGF]] :: (dereferenceable load 4 from @f) +; 64BIT-DAG: renamable $x[[REGD:[0-9]+]] = LDtoc @d, $x2 :: (load 8 from got) +; 64BIT-DAG: renamable $f2 = LFD 0, killed renamable $x[[REGD]] :: (dereferenceable load 8 from @d) +; 64BIT-NEXT: ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-DAG: STFS renamable $f1, 112, $x1 :: (store 4) +; 64BIT-DAG: STFD renamable $f2, 120, $x1 :: (store 8) +; 64BIT-DAG: $x3 = LI8 1 +; 64BIT-DAG: $x4 = LI8 2 +; 64BIT-DAG: $x5 = LI8 3 +; 64BIT-DAG: $x6 = LI8 4 +; 64BIT-DAG: $x7 = LI8 5 +; 64BIT-DAG: $x8 = LI8 6 +; 64BIT-DAG: $x9 = LI8 7 +; 64BIT-DAG: $x10 = LI8 8 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $f1, implicit $f2, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1 + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; ASM64PWR4: stdu 1, -128(1) +; ASM64PWR4-DAG: ld [[REGF:[0-9]+]], LC7(2) +; ASM64PWR4-DAG: lfs 1, 0([[REGF]]) +; ASM64PWR4-DAG: ld [[REGD:[0-9]+]], LC8(2) +; ASM64PWR4-DAG: lfd 2, 0([[REGD]]) +; ASM64PWR4-DAG: stfs 1, 112(1) +; ASM64PWR4-DAG: stfd 2, 120(1) +; ASM64PWR4-DAG: li 3, 1 +; ASM64PWR4-DAG: li 4, 2 +; ASM64PWR4-DAG: li 5, 3 +; ASM64PWR4-DAG: li 6, 4 +; ASM64PWR4-DAG: li 7, 5 +; ASM64PWR4-DAG: li 8, 6 +; ASM64PWR4-DAG: li 9, 7 +; ASM64PWR4-DAG: li 10, 8 +; ASM64PWR4-NEXT: bl .test_stackarg_float +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 128 + +; A double arg will pass on the stack in PPC32 if there is only one available GPR. +define void @call_test_stackarg_float2() { +entry: + %0 = load double, double* @d, align 8 + %1 = load float, float* @f, align 4 + call void (i32, i32, i32, i32, i32, i32, i32, ...) @test_stackarg_float2(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, double %0, float %1) + ret void +} + +declare void @test_stackarg_float2(i32, i32, i32, i32, i32, i32, i32, ...) + +; CHECK-LABEL: name: call_test_stackarg_float2{{.*}} + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; In 32-bit the double arg is written to memory because it cannot be fully stored in the last 32-bit GPR. +; 32BIT-DAG: renamable $r[[REGD:[0-9]+]] = LWZtoc @d, $r2 :: (load 4 from got) +; 32BIT-DAG: renamable $f1 = LFD 0, killed renamable $r[[REGD]] :: (dereferenceable load 8 from @d) +; 32BIT-DAG: renamable $r[[REGF:[0-9]+]] = LWZtoc @f, $r2 :: (load 4 from got) +; 32BIT-DAG: renamable $f2 = LFS 0, killed renamable $r[[REGF]] :: (dereferenceable load 4 from @f) +; 32BIT-DAG: ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-DAG: STFD renamable $f1, 52, $r1 :: (store 8) +; 32BIT-DAG: STFS renamable $f2, 60, $r1 :: (store 4) +; 32BIT-DAG: $r3 = LI 1 +; 32BIT-DAG: $r4 = LI 2 +; 32BIT-DAG: $r5 = LI 3 +; 32BIT-DAG: $r6 = LI 4 +; 32BIT-DAG: $r7 = LI 5 +; 32BIT-DAG: $r8 = LI 6 +; 32BIT-DAG: $r9 = LI 7 +; 32BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store 8 into %stack.0) +; 32BIT-DAG: renamable $r10 = LWZ 0, %stack.0 :: (load 4 from %stack.0, align 8) +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit $f1, implicit $r10, implicit $f2, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1 + +; CHECKASM-LABEL: .call_test_stackarg_float2: + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; ASM32PWR4: stwu 1, -80(1) +; ASM32PWR4-DAG: lwz [[REGD:[0-9]+]], LC9(2) +; ASM32PWR4-DAG: lfd 1, 0([[REGD]]) +; ASM32PWR4-DAG: lwz [[REGF:[0-9]+]], LC8(2) +; ASM32PWR4-DAG: lfs 2, 0([[REGF]]) +; ASM32PWR4-DAG: stfd 1, 52(1) +; ASM32PWR4-DAG: stfs 2, 60(1) +; ASM32PWR4-DAG: li 3, 1 +; ASM32PWR4-DAG: li 4, 2 +; ASM32PWR4-DAG: li 5, 3 +; ASM32PWR4-DAG: li 6, 4 +; ASM32PWR4-DAG: li 7, 5 +; ASM32PWR4-DAG: li 8, 6 +; ASM32PWR4-DAG: li 9, 7 +; ASM32PWR4-DAG: stfd 1, 72(1) +; ASM32PWR4-DAG: lwz 10, 72(1) +; ASM32PWR4-NEXT: bl .test_stackarg_float2 +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 80 + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; In 64-bit the double arg is not written to memory because it is fully stored in the last 64-bit GPR. +; 64BIT-DAG: renamable $x[[REGD:[0-9]+]] = LDtoc @d, $x2 :: (load 8 from got) +; 64BIT-DAG: renamable $f1 = LFD 0, killed renamable $x[[REGD]] :: (dereferenceable load 8 from @d) +; 64BIT-DAG: renamable $x[[REGF:[0-9]+]] = LDtoc @f, $x2 :: (load 8 from got) +; 64BIT-DAG: renamable $f2 = LFS 0, killed renamable $x[[REGF]] :: (dereferenceable load 4 from @f) +; 64BIT-DAG: ADJCALLSTACKDOWN 120, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-DAG: STFS renamable $f2, 112, $x1 :: (store 4) +; 64BIT-DAG: $x3 = LI8 1 +; 64BIT-DAG: $x4 = LI8 2 +; 64BIT-DAG: $x5 = LI8 3 +; 64BIT-DAG: $x6 = LI8 4 +; 64BIT-DAG: $x7 = LI8 5 +; 64BIT-DAG: $x8 = LI8 6 +; 64BIT-DAG: $x9 = LI8 7 +; 64BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store 8 into %stack.0) +; 64BIT-DAG: renamable $x10 = LD 0, %stack.0 :: (load 8 from %stack.0) +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit $f1, implicit $x10, implicit $f2, implicit $x2, implicit-def $r1 + +; 64BIT-NEXT: ADJCALLSTACKUP 120, 0, implicit-def dead $r1, implicit $r1 + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; ASM64PWR4: stdu 1, -128(1) +; ASM64PWR4-DAG: ld [[REGD:[0-9]+]], LC8(2) +; ASM64PWR4-DAG: lfd 1, 0([[REGD]]) +; ASM64PWR4-DAG: ld [[REGF:[0-9]+]], LC7(2) +; ASM64PWR4-DAG: lfs 2, 0([[REGF]]) +; ASM64PWR4-DAG: stfs 2, 112(1) +; ASM64PWR4-DAG: li 3, 1 +; ASM64PWR4-DAG: li 4, 2 +; ASM64PWR4-DAG: li 5, 3 +; ASM64PWR4-DAG: li 6, 4 +; ASM64PWR4-DAG: li 7, 5 +; ASM64PWR4-DAG: li 8, 6 +; ASM64PWR4-DAG: li 9, 7 +; ASM64PWR4-NEXT: bl .test_stackarg_float2 +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 128 Index: llvm/test/CodeGen/PowerPC/aix-stackargs.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aix-stackargs.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: not llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s -; RUN: not llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s - -define void @bar() { -entry: - call void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) - ret void -} - -declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, i32) - -; CHECK: LLVM ERROR: Handling of placing parameters on the stack is unimplemented!