Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6795,15 +6795,35 @@ // This includes f64 in 64-bit mode for ABI compatibility. State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4); if (unsigned Reg = State.AllocateReg(FPR)) - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::f64, LocInfo)); + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); else report_fatal_error("Handling of placing parameters on the stack is " "unimplemented!"); // f32 reserves 1 GPR in both PPC32 and PPC64. // f64 reserves 2 GPRs in PPC32 and 1 GPR in PPC64. - for (unsigned i = 0; i < StoreSize; i += PtrByteSize) - State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32); + // Successfully reserved GPRs are only initialized for vararg calls. + // Custom handling is required to split an f64 into 2 GPRs. + if (!IsPPC64 && LocVT.SimpleTy == MVT::f64) { + for (int i = 0; i < 2; ++i) + if (unsigned Reg = State.AllocateReg(GPR_32)) { + if (State.isVarArg()) + State.addLoc(CCValAssign::getCustomReg(ValNo, MVT::i32, Reg, + MVT::i32, LocInfo)); + } else if (State.isVarArg()) + report_fatal_error("Handling of placing parameters on the stack is " + "unimplemented!"); + return false; + } + MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; + assert(RegVT.getStoreSize() >= StoreSize && + "GPR reserved for float arg is too small"); + if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) { + if (State.isVarArg()) + State.addLoc(CCValAssign::getReg(ValNo, RegVT, Reg, RegVT, LocInfo)); + } else if (State.isVarArg()) + report_fatal_error("Handling of placing parameters on the stack is " + "unimplemented!"); return false; } } @@ -6933,7 +6953,7 @@ CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && "Unexpected calling convention!"); - if (isVarArg || isPatchPoint) + if (isPatchPoint) report_fatal_error("This call type is unimplemented on AIX."); if (!isFunctionGlobalAddress(Callee) && !isa(Callee)) @@ -6955,7 +6975,8 @@ // [SP][CR][LR][2 x reserved][TOC]. // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64. const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); - const unsigned PtrByteSize = Subtarget.isPPC64() ? 8 : 4; + bool IsPPC64 = Subtarget.isPPC64(); + unsigned PtrByteSize = IsPPC64 ? 8 : 4; CCInfo.AllocateStack(LinkageSize, PtrByteSize); CCInfo.AnalyzeCallOperands(Outs, CC_AIX); @@ -6975,7 +6996,8 @@ SmallVector, 8> RegsToPass; - for (CCValAssign &VA : ArgLocs) { + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[VA.getValNo()]; switch (VA.getLocInfo()) { @@ -6989,8 +7011,31 @@ break; } - if (VA.isRegLoc()) - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + if (VA.isRegLoc()) { + if (!VA.needsCustom()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + continue; + } + // A f64 vararg in PPC32 passes in both 1 FPR and 2 GPRs. + // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs. + assert(Arg.getValueType() == MVT::f64 && isVarArg && !IsPPC64 && + "Unexpected custom register for argument"); + CCValAssign &GPR1 = VA; + SDValue ArgAsI64 = DAG.getBitcast(MVT::i64, Arg); + SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsI64, + DAG.getConstant(32, dl, MVT::i8)); + RegsToPass.push_back(std::make_pair( + GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32))); + // The second GPR may not be present if there were no GPRs remaining. + if (i + 1 == e) + continue; + CCValAssign &GPR2 = ArgLocs[++i]; + if (!GPR2.isRegLoc() || GPR2.getValNo() != GPR1.getValNo()) + continue; + assert(GPR2.needsCustom() && "A second custom GPR is expected"); + RegsToPass.push_back(std::make_pair( + GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsI64, dl, MVT::i32))); + } if (VA.isMemLoc()) report_fatal_error("Handling of placing parameters on the stack is " Index: llvm/test/CodeGen/PowerPC/aix-cc-abi.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -148,7 +148,6 @@ call void @test_i1(i1 1) ret void } - ; CHECK-LABEL: name: call_test_i1 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 @@ -251,7 +250,6 @@ ret void } - ; CHECK-LABEL: name: call_test_i64 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 @@ -612,3 +610,121 @@ ; 64BIT: body: | ; 64BIT-NEXT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6, $x7 + +define void @call_test_vararg() { +entry: + %0 = load float, float* @f1, align 4 + %conv = fpext float %0 to double + %1 = load double, double* @d1, align 8 + call void (i32, ...) @test_vararg(i32 42, double %conv, double %1) + ret void +} + +declare void @test_vararg(i32, ...) + +; CHECK-LABEL: name: call_test_vararg + +; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1) +; 32BIT-NEXT: renamable $r3 = LWZtoc @d1, $r2 :: (load 4 from got) +; 32BIT-NEXT: STFD renamable $f1, 0, %stack.1 :: (store 8 into %stack.1) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r3 :: (dereferenceable load 8 from @d1) +; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.1 :: (load 4 from %stack.1, align 8) +; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.1 :: (load 4 from %stack.1 + 4) +; 32BIT-NEXT: STFD renamable $f2, 0, %stack.0 :: (store 8 into %stack.0) +; 32BIT-NEXT: renamable $r6 = LWZ 0, %stack.0 :: (load 4 from %stack.0, align 8) +; 32BIT-NEXT: renamable $r7 = LWZ 4, %stack.0 :: (load 4 from %stack.0 + 4) +; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: $r3 = LI 42 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit $f2, implicit $r6, implicit $r7, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $x4 = LDtoc @d1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load 4 from @f1) +; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x4 :: (dereferenceable load 8 from @d1) +; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: $x3 = LI8 42 +; 64BIT-NEXT: $x4 = COPY renamable $f1 +; 64BIT-NEXT: $x5 = COPY renamable $f2 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit $f2, implicit killed $x5, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define void @call_test_vararg2() { +entry: + %0 = load float, float* @f1, align 4 + %conv = fpext float %0 to double + %1 = load double, double* @d1, align 8 + call void (i32, ...) @test_vararg(i32 42, double %conv, i32 42, double %1) + ret void +} + +; CHECK-LABEL: name: call_test_vararg2 + +; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1) +; 32BIT-NEXT: renamable $r3 = LWZtoc @d1, $r2 :: (load 4 from got) +; 32BIT-NEXT: STFD renamable $f1, 0, %stack.1 :: (store 8 into %stack.1) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r3 :: (dereferenceable load 8 from @d1) +; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.1 :: (load 4 from %stack.1, align 8) +; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.1 :: (load 4 from %stack.1 + 4) +; 32BIT-NEXT: STFD renamable $f2, 0, %stack.0 :: (store 8 into %stack.0) +; 32BIT-NEXT: renamable $r7 = LWZ 0, %stack.0 :: (load 4 from %stack.0, align 8) +; 32BIT-NEXT: renamable $r8 = LWZ 4, %stack.0 :: (load 4 from %stack.0 + 4) +; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: $r3 = LI 42 +; 32BIT-NEXT: $r6 = LI 42 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit killed $r6, implicit $f2, implicit $r7, implicit $r8, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $x4 = LDtoc @d1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load 4 from @f1) +; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x4 :: (dereferenceable load 8 from @d1) +; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: $x3 = LI8 42 +; 64BIT-NEXT: $x4 = COPY renamable $f1 +; 64BIT-NEXT: $x5 = LI8 42 +; 64BIT-NEXT: $x6 = COPY renamable $f2 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit killed $x5, implicit $f2, implicit killed $x6, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define void @call_test_vararg3() { +entry: + %0 = load float, float* @f1, align 4 + %conv = fpext float %0 to double + %1 = load double, double* @d1, align 8 + call void (i32, ...) @test_vararg(i32 42, double %conv, i64 42, double %1) + ret void +} + +; CHECK-LABEL: name: call_test_vararg3 + +; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1) +; 32BIT-NEXT: renamable $r3 = LWZtoc @d1, $r2 :: (load 4 from got) +; 32BIT-NEXT: STFD renamable $f1, 0, %stack.1 :: (store 8 into %stack.1) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r3 :: (dereferenceable load 8 from @d1) +; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.1 :: (load 4 from %stack.1, align 8) +; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.1 :: (load 4 from %stack.1 + 4) +; 32BIT-NEXT: STFD renamable $f2, 0, %stack.0 :: (store 8 into %stack.0) +; 32BIT-NEXT: renamable $r8 = LWZ 0, %stack.0 :: (load 4 from %stack.0, align 8) +; 32BIT-NEXT: renamable $r9 = LWZ 4, %stack.0 :: (load 4 from %stack.0 + 4) +; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: $r3 = LI 42 +; 32BIT-NEXT: $r6 = LI 0 +; 32BIT-NEXT: $r7 = LI 42 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit killed $r6, implicit killed $r7, implicit $f2, implicit $r8, implicit $r9, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $x4 = LDtoc @d1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load 4 from @f1) +; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x4 :: (dereferenceable load 8 from @d1) +; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: $x3 = LI8 42 +; 64BIT-NEXT: $x4 = COPY renamable $f1 +; 64BIT-NEXT: $x5 = LI8 42 +; 64BIT-NEXT: $x6 = COPY renamable $f2 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit killed $x5, implicit $f2, implicit killed $x6, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1