Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6829,6 +6829,9 @@ if (ArgFlags.isNest()) report_fatal_error("Nest arguments are unimplemented."); + if (ValVT.isVector() || LocVT.isVector()) + report_fatal_error("Vector arguments are unimplemented on AIX."); + const PPCSubtarget &Subtarget = static_cast( State.getMachineFunction().getSubtarget()); const bool IsPPC64 = Subtarget.isPPC64(); @@ -6873,18 +6876,33 @@ // This includes f64 in 64-bit mode for ABI compatibility. State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4); if (unsigned Reg = State.AllocateReg(FPR)) - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::f64, LocInfo)); + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); else report_fatal_error("Handling of placing parameters on the stack is " "unimplemented!"); - // f32 reserves 1 GPR in both PPC32 and PPC64. - // f64 reserves 2 GPRs in PPC32 and 1 GPR in PPC64. - for (unsigned i = 0; i < StoreSize; i += PtrByteSize) - State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32); + // AIX requires that GPRs are reserved for float arguments. + // Successfully reserved GPRs are only initialized for vararg calls. + MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; + for (unsigned I = 0; I < StoreSize; I += PtrByteSize) { + if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) { + if (State.isVarArg()) { + // Custom handling is required for: + // f64 in PPC32 needs to be split into 2 GPRs. + // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR. + State.addLoc( + CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo)); + } + } else if (State.isVarArg()) { + report_fatal_error("Handling of placing parameters on the stack is " + "unimplemented!"); + } + } + return false; } } + return true; } static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT, @@ -7011,7 +7029,7 @@ CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && "Unexpected calling convention!"); - if (isVarArg || isPatchPoint) + if (isPatchPoint) report_fatal_error("This call type is unimplemented on AIX."); const PPCSubtarget& Subtarget = @@ -7030,7 +7048,8 @@ // [SP][CR][LR][2 x reserved][TOC]. // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64. const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); - const unsigned PtrByteSize = Subtarget.isPPC64() ? 8 : 4; + const bool IsPPC64 = Subtarget.isPPC64(); + const unsigned PtrByteSize = IsPPC64 ? 8 : 4; CCInfo.AllocateStack(LinkageSize, PtrByteSize); CCInfo.AnalyzeCallOperands(Outs, CC_AIX); @@ -7050,26 +7069,70 @@ SmallVector, 8> RegsToPass; - for (CCValAssign &VA : ArgLocs) { + for (unsigned I = 0, E = ArgLocs.size(); I != E;) { + CCValAssign &VA = ArgLocs[I++]; + + if (VA.isMemLoc()) + report_fatal_error("Handling of placing parameters on the stack is " + "unimplemented!"); + if (!VA.isRegLoc()) + report_fatal_error( + "Unexpected non-register location for function call argument."); + SDValue Arg = OutVals[VA.getValNo()]; - switch (VA.getLocInfo()) { - default: report_fatal_error("Unexpected argument extension type."); - case CCValAssign::Full: break; - case CCValAssign::ZExt: - Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); - break; - case CCValAssign::SExt: - Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); - break; + if (!VA.needsCustom()) { + switch (VA.getLocInfo()) { + default: + report_fatal_error("Unexpected argument extension type."); + case CCValAssign::Full: + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + + continue; } - if (VA.isRegLoc()) - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + // Custom handling is used for GPR initializations for vararg float + // arguments. + assert(isVarArg && VA.getValVT().isFloatingPoint() && + VA.getLocVT().isInteger() && + "Unexpected custom register handling for calling convention."); - if (VA.isMemLoc()) - report_fatal_error("Handling of placing parameters on the stack is " - "unimplemented!"); + SDValue ArgAsInt = + DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg); + + if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize()) + // f32 in 32-bit GPR + // f64 in 64-bit GPR + RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt)); + else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits()) + // f32 in 64-bit GPR. + RegsToPass.push_back(std::make_pair( + VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT()))); + else { + // f64 in two 32-bit GPRs + // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs. + assert(Arg.getValueType() == MVT::f64 && isVarArg && !IsPPC64 && + "Unexpected custom register for argument!"); + CCValAssign &GPR1 = VA; + SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt, + DAG.getConstant(32, dl, MVT::i8)); + RegsToPass.push_back(std::make_pair( + GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32))); + assert(I != E && "A second custom GPR is expected!"); + CCValAssign &GPR2 = ArgLocs[I++]; + assert(GPR2.isRegLoc() && GPR2.getValNo() == GPR1.getValNo() && + GPR2.needsCustom() && "A second custom GPR is expected!"); + RegsToPass.push_back(std::make_pair( + GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32))); + } } // For indirect calls, we need to save the TOC base to the stack for Index: llvm/test/CodeGen/PowerPC/aix-cc-abi.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -1,9 +1,17 @@ ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ ; RUN: FileCheck --check-prefixes=CHECK,32BIT %s +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=CHECKASM,ASM32PWR4 %s + ; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ ; RUN: FileCheck --check-prefixes=CHECK,64BIT %s +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=CHECKASM,ASM64PWR4 %s + define void @call_test_chars() { entry: call i8 @test_chars(i8 signext 97, i8 signext 97, i8 signext 97, i8 signext 97) @@ -148,7 +156,6 @@ call void @test_i1(i1 1) ret void } - ; CHECK-LABEL: name: call_test_i1 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 @@ -251,7 +258,6 @@ ret void } - ; CHECK-LABEL: name: call_test_i64 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 @@ -612,3 +618,263 @@ ; 64BIT: body: | ; 64BIT-NEXT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6, $x7 + +define void @call_test_vararg() { +entry: + %0 = load float, float* @f1, align 4 + %conv = fpext float %0 to double + %1 = load double, double* @d1, align 8 + call void (i32, ...) @test_vararg(i32 42, double %conv, double %1) + ret void +} + +declare void @test_vararg(i32, ...) + +; CHECK-LABEL: name: call_test_vararg + +; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) +; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) +; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) +; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) +; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) +; 32BIT-NEXT: renamable $r6 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) +; 32BIT-NEXT: renamable $r7 = LWZ 4, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]] + 4) +; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: $r3 = LI 42 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit $f2, implicit $r6, implicit $r7, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; CHECKASM-LABEL: .call_test_vararg: + +; ASM32PWR4: stwu 1, -80(1) +; ASM32PWR4-NEXT: lwz [[REG1:[0-9]+]], LC1(2) +; ASM32PWR4-NEXT: lfs 1, 0([[REG1]]) +; ASM32PWR4-NEXT: lwz [[REG2:[0-9]+]], LC2(2) +; ASM32PWR4-NEXT: stfd 1, 64(1) +; ASM32PWR4-NEXT: lfd 2, 0([[REG2]]) +; ASM32PWR4-NEXT: li 3, 42 +; ASM32PWR4-NEXT: stfd 2, 72(1) +; ASM32PWR4-DAG: lwz 4, 64(1) +; ASM32PWR4-DAG: lwz 5, 68(1) +; ASM32PWR4-DAG: lwz 6, 72(1) +; ASM32PWR4-DAG: lwz 7, 76(1) +; ASM32PWR4-NEXT: bl .test_vararg +; ASM32PWR4-NEXT: nop + +; 64BIT: renamable $x[[REG1:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG1]] :: (dereferenceable load 4 from @f1) +; 64BIT-NEXT: renamable $x[[REG2:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got) +; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) +; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG2]] :: (dereferenceable load 8 from @d1) +; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load 8 from %stack.[[SLOT1]]) +; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) +; 64BIT-NEXT: renamable $x5 = LD 0, %stack.[[SLOT2]] :: (load 8 from %stack.[[SLOT2]]) +; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: $x3 = LI8 42 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit $f2, implicit $x5, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; ASM64PWR4: stdu 1, -128(1) +; ASM64PWR4-NEXT: ld [[REG1:[0-9]+]], LC1(2) +; ASM64PWR4-NEXT: lfs 1, 0([[REG1]]) +; ASM64PWR4-NEXT: ld [[REG2:[0-9]+]], LC2(2) +; ASM64PWR4-NEXT: stfd 1, 112(1) +; ASM64PWR4-NEXT: lfd 2, 0([[REG2]]) +; ASM64PWR4-NEXT: li 3, 42 +; ASM64PWR4-NEXT: stfd 2, 120(1) +; ASM64PWR4-NEXT: ld 4, 112(1) +; ASM64PWR4-NEXT: ld 5, 120(1) +; ASM64PWR4-NEXT: bl .test_vararg +; ASM64PWR4-NEXT: nop + +define void @call_test_vararg2() { +entry: + %0 = load float, float* @f1, align 4 + %conv = fpext float %0 to double + %1 = load double, double* @d1, align 8 + call void (i32, ...) @test_vararg(i32 42, double %conv, i32 42, double %1) + ret void +} + +; CHECK-LABEL: name: call_test_vararg2 + +; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) +; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) +; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) +; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) +; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) +; 32BIT-NEXT: renamable $r7 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) +; 32BIT-NEXT: renamable $r8 = LWZ 4, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]] + 4) +; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: $r3 = LI 42 +; 32BIT-NEXT: $r6 = LI 42 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit killed $r6, implicit $f2, implicit $r7, implicit $r8, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; ASM32PWR4: stwu 1, -80(1) +; ASM32PWR4-NEXT: lwz [[REG1:[0-9]+]], LC1(2) +; ASM32PWR4-NEXT: li 6, 42 +; ASM32PWR4-NEXT: lfs 1, 0([[REG1]]) +; ASM32PWR4-NEXT: lwz [[REG2:[0-9]+]], LC2(2) +; ASM32PWR4-NEXT: stfd 1, 64(1) +; ASM32PWR4-NEXT: lfd 2, 0([[REG2]]) +; ASM32PWR4-NEXT: li 3, 42 +; ASM32PWR4-NEXT: stfd 2, 72(1) +; ASM32PWR4-DAG: lwz 4, 64(1) +; ASM32PWR4-DAG: lwz 5, 68(1) +; ASM32PWR4-DAG: lwz 7, 72(1) +; ASM32PWR4-DAG: lwz 8, 76(1) +; ASM32PWR4-NEXT: bl .test_vararg +; ASM32PWR4-NEXT: nop + +; 64BIT: renamable $x[[REG1:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG1]] :: (dereferenceable load 4 from @f1) +; 64BIT-NEXT: renamable $x[[REG2:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got) +; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) +; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG2]] :: (dereferenceable load 8 from @d1) +; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load 8 from %stack.[[SLOT1]]) +; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) +; 64BIT-NEXT: renamable $x6 = LD 0, %stack.[[SLOT2]] :: (load 8 from %stack.[[SLOT2]]) +; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: $x3 = LI8 42 +; 64BIT-NEXT: $x5 = LI8 42 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit killed $x5, implicit $f2, implicit $x6, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; ASM64PWR4: stdu 1, -128(1) +; ASM64PWR4-NEXT: ld [[REG1:[0-9]+]], LC1(2) +; ASM64PWR4-NEXT: li 5, 42 +; ASM64PWR4-NEXT: lfs 1, 0([[REG1]]) +; ASM64PWR4-NEXT: ld [[REG2:[0-9]+]], LC2(2) +; ASM64PWR4-NEXT: stfd 1, 112(1) +; ASM64PWR4-NEXT: lfd 2, 0([[REG2]]) +; ASM64PWR4-NEXT: li 3, 42 +; ASM64PWR4-NEXT: stfd 2, 120(1) +; ASM64PWR4-NEXT: ld 4, 112(1) +; ASM64PWR4-NEXT: ld 6, 120(1) +; ASM64PWR4-NEXT: bl .test_vararg +; ASM64PWR4-NEXT: nop + +define void @call_test_vararg3() { +entry: + %0 = load float, float* @f1, align 4 + %conv = fpext float %0 to double + %1 = load double, double* @d1, align 8 + call void (i32, ...) @test_vararg(i32 42, double %conv, i64 42, double %1) + ret void +} + +; CHECK-LABEL: name: call_test_vararg3 + +; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) +; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) +; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) +; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) +; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) +; 32BIT-NEXT: renamable $r8 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) +; 32BIT-NEXT: renamable $r9 = LWZ 4, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]] + 4) +; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: $r3 = LI 42 +; 32BIT-NEXT: $r6 = LI 0 +; 32BIT-NEXT: $r7 = LI 42 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit killed $r6, implicit killed $r7, implicit $f2, implicit $r8, implicit $r9, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; ASM32PWR4: stwu 1, -80(1) +; ASM32PWR4-NEXT: lwz [[REG1:[0-9]+]], LC1(2) +; ASM32PWR4-DAG: li 6, 0 +; ASM32PWR4-DAG: li 7, 42 +; ASM32PWR4-NEXT: lfs 1, 0([[REG1]]) +; ASM32PWR4-NEXT: lwz [[REG2:[0-9]+]], LC2(2) +; ASM32PWR4-NEXT: stfd 1, 64(1) +; ASM32PWR4-NEXT: lfd 2, 0([[REG2]]) +; ASM32PWR4-NEXT: li 3, 42 +; ASM32PWR4-NEXT: stfd 2, 72(1) +; ASM32PWR4-DAG: lwz 4, 64(1) +; ASM32PWR4-DAG: lwz 5, 68(1) +; ASM32PWR4-DAG: lwz 8, 72(1) +; ASM32PWR4-DAG: lwz 9, 76(1) +; ASM32PWR4-NEXT: bl .test_vararg +; ASM32PWR4-NEXT: nop + +; 64BIT: renamable $x[[REG1:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG1]] :: (dereferenceable load 4 from @f1) +; 64BIT-NEXT: renamable $x[[REG2:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got) +; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) +; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG2]] :: (dereferenceable load 8 from @d1) +; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load 8 from %stack.[[SLOT1]]) +; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) +; 64BIT-NEXT: renamable $x6 = LD 0, %stack.[[SLOT2]] :: (load 8 from %stack.[[SLOT2]]) +; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: $x3 = LI8 42 +; 64BIT-NEXT: $x5 = LI8 42 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit killed $x5, implicit $f2, implicit $x6, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; ASM64PWR4: stdu 1, -128(1) +; ASM64PWR4-NEXT: ld [[REG1:[0-9]+]], LC1(2) +; ASM64PWR4-NEXT: li 5, 42 +; ASM64PWR4-NEXT: lfs 1, 0([[REG1]]) +; ASM64PWR4-NEXT: ld [[REG2:[0-9]+]], LC2(2) +; ASM64PWR4-NEXT: stfd 1, 112(1) +; ASM64PWR4-NEXT: lfd 2, 0([[REG2]]) +; ASM64PWR4-NEXT: li 3, 42 +; ASM64PWR4-NEXT: stfd 2, 120(1) +; ASM64PWR4-DAG: ld 4, 112(1) +; ASM64PWR4-DAG: ld 6, 120(1) +; ASM64PWR4-NEXT: bl .test_vararg +; ASM64PWR4-NEXT: nop + +define void @call_test_vararg4() { +entry: + %0 = load float, float* @f1, align 4 + call void (i32, ...) @test_vararg(i32 42, float %0) + ret void +} + +; CHECK-LABEL: name: call_test_vararg4 + +; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1) +; 32BIT-NEXT: STFS renamable $f1, 0, %stack.[[SLOT:[0-9]+]] :: (store 4 into %stack.[[SLOT]]) +; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT]] :: (load 4 from %stack.[[SLOT]]) +; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: $r3 = LI 42 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load 4 from @f1) +; 64BIT-NEXT: STFS renamable $f1, 0, %stack.[[SLOT:[0-9]+]] :: (store 4 into %stack.[[SLOT]]) +; 64BIT-NEXT: renamable $x4 = LWZ8 0, %stack.[[SLOT]] :: (load 4 from %stack.[[SLOT]]) +; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: $x3 = LI8 42 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; ASM32PWR4: stwu 1, -64(1) +; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC1(2) +; ASM32PWR4-NEXT: lfs 1, 0([[REG]]) +; ASM32PWR4-NEXT: li 3, 42 +; ASM32PWR4-NEXT: stfs 1, 60(1) +; ASM32PWR4-NEXT: lwz 4, 60(1) +; ASM32PWR4-NEXT: bl .test_vararg +; ASM32PWR4-NEXT: nop + +; ASM64PWR4: stdu 1, -128(1) +; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC1(2) +; ASM64PWR4-NEXT: lfs 1, 0([[REG]]) +; ASM64PWR4-NEXT: li 3, 42 +; ASM64PWR4-NEXT: stfs 1, 124(1) +; ASM64PWR4-NEXT: lwz 4, 124(1) +; ASM64PWR4-NEXT: bl .test_vararg +; ASM64PWR4-NEXT: nop Index: llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll @@ -0,0 +1,22 @@ +; RUN: not llc < %s -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s + +; This test expects a compiler diagnostic for an AIX limitation on Altivec +; support. When the Altivec limitation diagnostic is removed, this test +; should compile clean and fail in order to alert the author to validate the +; instructions emitted to initialize the GPR for the double vararg. +; The mfvsrwz and mfvsrd instructions should be used to initialize the GPR for +; the double vararg without going through memory. + +@f1 = global float 0.000000e+00, align 4 + +define void @call_test_vararg() { +entry: + %0 = load float, float* @f1, align 4 + %conv = fpext float %0 to double + call void (i32, ...) @test_vararg(i32 42, double %conv) + ret void +} + +declare void @test_vararg(i32, ...) + +; CHECK: LLVM ERROR: Altivec support is unimplemented on AIX.