diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -369,7 +369,8 @@ } BuiltinVaListKind getBuiltinVaListKind() const override { - // This is the ELF definition, and is overridden by the Darwin sub-target + // This is the ELF definition, and is overridden by the Darwin and AIX + // sub-target return TargetInfo::PowerABIBuiltinVaList; } }; diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -4229,7 +4229,8 @@ // DefaultABIInfo::EmitVAArg. Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, QualType Ty) const { - if (getTarget().getTriple().isOSDarwin()) { + if (getTarget().getTriple().isOSDarwin() || + getTarget().getTriple().isOSAIX()) { auto TI = getContext().getTypeInfoInChars(Ty); TI.second = getParamTypeAlignment(Ty); @@ -9917,7 +9918,9 @@ new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft" || getTarget().hasFeature("spe"))); case llvm::Triple::ppc64: - if (Triple.isOSBinFormatELF()) { + // TODO: Add AIX ABI Info. Currently we are relying on + // PPC64/PPC32_SVR4_ABIInfo. + if (Triple.isOSBinFormatELF() || Triple.isOSAIX()) { PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1; if (getTarget().getABI() == "elfv2") Kind = PPC64_SVR4_ABIInfo::ELFv2; diff --git a/clang/test/CodeGen/aix-vararg.c b/clang/test/CodeGen/aix-vararg.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aix-vararg.c @@ -0,0 +1,52 @@ +// REQUIRES: powerpc-registered-target +// REQUIRES: asserts +// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -emit-llvm -o - %s | FileCheck %s --check-prefix=32BIT +// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -emit-llvm -o - %s | FileCheck %s --check-prefix=64BIT +#include + +void aix_varg(int a, ...) { + va_list arg; + va_start(arg, a); + va_arg(arg, int); + va_end(arg); +} + +// 32BIT: define void @aix_varg(i32 %a, ...) #0 { +// 32BIT: entry: +// 32BIT-NEXT: %a.addr = alloca i32, align 4 +// 32BIT-NEXT: %arg = alloca i8*, align 4 +// 32BIT-NEXT: store i32 %a, i32* %a.addr, align 4 +// 32BIT-NEXT: %arg1 = bitcast i8** %arg to i8* +// 32BIT-NEXT: call void @llvm.va_start(i8* %arg1) +// 32BIT-NEXT: %argp.cur = load i8*, i8** %arg, align 4 +// 32BIT-NEXT: %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4 +// 32BIT-NEXT: store i8* %argp.next, i8** %arg, align 4 +// 32BIT-NEXT: %0 = bitcast i8* %argp.cur to i32* +// 32BIT-NEXT: %1 = load i32, i32* %0, align 4 +// 32BIT-NEXT: %arg2 = bitcast i8** %arg to i8* +// 32BIT-NEXT: call void @llvm.va_end(i8* %arg2) +// 32BIT-NEXT: ret void +// 32BIT-NEXT: } +// 32BIT: declare void @llvm.va_start(i8*) +// 32BIT: declare void @llvm.va_end(i8*) + +// 64BIT: define void @aix_varg(i32 signext %a, ...) #0 { +// 64BIT: entry: +// 64BIT-NEXT: %a.addr = alloca i32, align 4 +// 64BIT-NEXT: %arg = alloca i8*, align 8 +// 64BIT-NEXT: store i32 %a, i32* %a.addr, align 4 +// 64BIT-NEXT: %arg1 = bitcast i8** %arg to i8* +// 64BIT-NEXT: call void @llvm.va_start(i8* %arg1) +// 64BIT-NEXT: %argp.cur = load i8*, i8** %arg, align 8 +// 64BIT-NEXT: %argp.next = getelementptr inbounds i8, i8* %argp.cur, i64 8 +// 64BIT-NEXT: store i8* %argp.next, i8** %arg, align 8 +// 64BIT-NEXT: %0 = getelementptr inbounds i8, i8* %argp.cur, i64 4 +// 64BIT-NEXT: %1 = bitcast i8* %0 to i32* +// 64BIT-NEXT: %2 = load i32, i32* %1, align 4 +// 64BIT-NEXT: %arg2 = bitcast i8** %arg to i8* +// 64BIT-NEXT: call void @llvm.va_end(i8* %arg2) +// 64BIT-NEXT: ret void +// 64BIT-NEXT: } +// 64BIT: declare void @llvm.va_start(i8*) +// 64BIT: declare void @llvm.va_end(i8*) + diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3239,7 +3239,7 @@ SDLoc dl(Op); - if (Subtarget.isPPC64()) { + if (Subtarget.isPPC64() || Subtarget.isAIXABI()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); @@ -6968,9 +6968,6 @@ CallConv == CallingConv::Fast) && "Unexpected calling convention!"); - if (isVarArg) - report_fatal_error("This call type is unimplemented on AIX."); - if (getTargetMachine().Options.GuaranteedTailCallOpt) report_fatal_error("Tail call support is unimplemented on AIX."); @@ -6988,6 +6985,7 @@ // Assign locations to all of the incoming arguments. SmallVector ArgLocs; MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); const EVT PtrVT = getPointerTy(MF.getDataLayout()); @@ -7014,7 +7012,7 @@ if (VA.isMemLoc() && VA.needsCustom()) continue; - if (VA.isRegLoc()) { + if (VA.isRegLoc() && !VA.needsCustom()) { MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy; unsigned VReg = MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64)); @@ -7026,42 +7024,87 @@ } InVals.push_back(ArgValue); continue; + } else if (VA.isMemLoc()) { + const unsigned LocSize = LocVT.getStoreSize(); + const unsigned ValSize = ValVT.getStoreSize(); + assert((ValSize <= LocSize) && + "Object size is larger than size of MemLoc"); + int CurArgOffset = VA.getLocMemOffset(); + // Objects are right-justified because AIX is big-endian. + if (LocSize > ValSize) + CurArgOffset += LocSize - ValSize; + MachineFrameInfo &MFI = MF.getFrameInfo(); + // Potential tail calls could cause overwriting of argument stack slots. + const bool IsImmutable = + !(getTargetMachine().Options.GuaranteedTailCallOpt && + (CallConv == CallingConv::Fast)); + int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + SDValue ArgValue = + DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo()); + InVals.push_back(ArgValue); + } + } + // On AIX a minimum of 8 words is saved to the parameter save area. + const unsigned MinParameterSaveArea = 8 * PtrByteSize; + // Area that is at least reserved in the caller of this function. + unsigned CallerReservedArea = std::max(CCInfo.getNextStackOffset(), + LinkageSize + MinParameterSaveArea); + + // Set the size that is at least reserved in caller of this function. Tail + // call optimized function's reserved stack space needs to be aligned so + // that taking the difference between two stack areas will result in an + // aligned stack. + CallerReservedArea = + EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea); + PPCFunctionInfo *FuncInfo = MF.getInfo(); + FuncInfo->setMinReservedArea(CallerReservedArea); + + SmallVector MemOps; + + if (isVarArg) { + + const static MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10}; + + const static MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10}; + + const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32); + + FuncInfo->setVarArgsNumGPR( + CCInfo.getFirstUnallocated(IsPPC64 ? GPR_64 : GPR_32)); + FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject( + PtrByteSize, CCInfo.getNextStackOffset(), true)); + SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); + // The fixed integer arguments of a variadic function are stored to the + // VarArgsFrameIndex on the stack so that they may be loaded by + // dereferencing the result of va_next. + for (unsigned GPRIndex = + (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize; + GPRIndex < NumGPArgRegs; ++GPRIndex) { + unsigned VReg = MF.getRegInfo().getLiveInVirtReg( + IsPPC64 ? GPR_64[GPRIndex] : GPR_32[GPRIndex]); + if (!VReg) { + if (IsPPC64) + VReg = MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass); + else + VReg = MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass); + } + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); + MemOps.push_back(Store); + // Increment the address for the next argument to store. + SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } } - const unsigned LocSize = LocVT.getStoreSize(); - const unsigned ValSize = ValVT.getStoreSize(); - assert((ValSize <= LocSize) && "Object size is larger than size of MemLoc"); - int CurArgOffset = VA.getLocMemOffset(); - // Objects are right-justified because AIX is big-endian. - if (LocSize > ValSize) - CurArgOffset += LocSize - ValSize; - MachineFrameInfo &MFI = MF.getFrameInfo(); - // Potential tail calls could cause overwriting of argument stack slots. - const bool IsImmutable = - !(getTargetMachine().Options.GuaranteedTailCallOpt && - (CallConv == CallingConv::Fast)); - int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable); - SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - SDValue ArgValue = DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo()); - InVals.push_back(ArgValue); - } - - // On AIX a minimum of 8 words is saved to the parameter save area. - const unsigned MinParameterSaveArea = 8 * PtrByteSize; - // Area that is at least reserved in the caller of this function. - unsigned CallerReservedArea = - std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea); - - // Set the size that is at least reserved in caller of this function. Tail - // call optimized function's reserved stack space needs to be aligned so - // that taking the difference between two stack areas will result in an - // aligned stack. - CallerReservedArea = - EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea); - PPCFunctionInfo *FuncInfo = MF.getInfo(); - FuncInfo->setMinReservedArea(CallerReservedArea); + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); - return Chain; + return Chain; } SDValue PPCTargetLowering::LowerCall_AIX( diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi-va_args-32.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi-va_args-32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi-va_args-32.ll @@ -0,0 +1,277 @@ +; RUN: llc -O2 -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mattr=-altivec -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,32BIT %s + +; RUN: llc -O2 -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=CHECKASM,ASM32PWR4 %s + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: nounwind +declare void @llvm.va_start(i8*) #2 + +; Function Attrs: nounwind +declare void @llvm.va_end(i8*) #2 +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +@a = local_unnamed_addr global i32 1, align 4 +@b = local_unnamed_addr global i32 2, align 4 +@c = local_unnamed_addr global i32 3, align 4 +@d = local_unnamed_addr global i32 4, align 4 +@e = local_unnamed_addr global i32 5, align 4 +@f = local_unnamed_addr global i32 6, align 4 +@g = local_unnamed_addr global i32 7, align 4 +@h = local_unnamed_addr global i32 8, align 4 +@i = local_unnamed_addr global i32 9, align 4 +@j = local_unnamed_addr global i32 10, align 4 + +; Function Attrs: nounwind +define i32 @va_arg1(i32 %a, ...) local_unnamed_addr #0 { +entry: + %arg = alloca i8*, align 4 + %0 = bitcast i8** %arg to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #2 + call void @llvm.va_start(i8* nonnull %0) + %cmp7 = icmp sgt i32 %a, 0 + br i1 %cmp7, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %argp.cur.pre = load i8*, i8** %arg, align 4 + %min.iters.check = icmp eq i32 %a, 1 + br i1 %min.iters.check, label %for.body.preheader15, label %vector.memcheck + +vector.memcheck: ; preds = %for.body.preheader + %uglygep = getelementptr inbounds i8, i8* %0, i32 1 + %1 = shl i32 %a, 2 + %scevgep = getelementptr i8, i8* %argp.cur.pre, i32 %1 + %bound0 = icmp ugt i8* %scevgep, %0 + %bound1 = icmp ult i8* %argp.cur.pre, %uglygep + %found.conflict = and i1 %bound0, %bound1 + br i1 %found.conflict, label %for.body.preheader15, label %vector.ph + +vector.ph: ; preds = %vector.memcheck + %n.vec = and i32 %a, -2 + %2 = shl i32 %n.vec, 2 + %ind.end = getelementptr i8, i8* %argp.cur.pre, i32 %2 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.phi = phi i32 [ undef, %vector.ph ], [ %11, %vector.body ] + %vec.phi13 = phi i32 [ 0, %vector.ph ], [ %12, %vector.body ] + %3 = shl i32 %index, 2 + %next.gep = getelementptr i8, i8* %argp.cur.pre, i32 %3 + %4 = shl i32 %index, 2 + %5 = or i32 %4, 4 + %next.gep12 = getelementptr i8, i8* %argp.cur.pre, i32 %5 + %6 = getelementptr inbounds i8, i8* %next.gep12, i32 4 + %7 = bitcast i8* %next.gep to i32* + %8 = bitcast i8* %next.gep12 to i32* + %9 = load i32, i32* %7, align 4 + %10 = load i32, i32* %8, align 4 + %11 = add i32 %9, %vec.phi + %12 = add i32 %10, %vec.phi13 + %index.next = add i32 %index, 2 + %13 = icmp eq i32 %index.next, %n.vec + br i1 %13, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + store i8* %6, i8** %arg, align 4 + %bin.rdx = add i32 %12, %11 + %cmp.n = icmp eq i32 %n.vec, %a + br i1 %cmp.n, label %for.end, label %for.body.preheader15 + +for.body.preheader15: ; preds = %middle.block, %vector.memcheck, %for.body.preheader + %argp.cur.ph = phi i8* [ %argp.cur.pre, %vector.memcheck ], [ %argp.cur.pre, %for.body.preheader ], [ %ind.end, %middle.block ] + %total.09.ph = phi i32 [ undef, %vector.memcheck ], [ undef, %for.body.preheader ], [ %bin.rdx, %middle.block ] + %i.08.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ] + br label %for.body + +for.body: ; preds = %for.body.preheader15, %for.body + %argp.cur = phi i8* [ %argp.next, %for.body ], [ %argp.cur.ph, %for.body.preheader15 ] + %total.09 = phi i32 [ %add, %for.body ], [ %total.09.ph, %for.body.preheader15 ] + %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader15 ] + %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4 + store i8* %argp.next, i8** %arg, align 4 + %14 = bitcast i8* %argp.cur to i32* + %15 = load i32, i32* %14, align 4 + %add = add nsw i32 %15, %total.09 + %inc = add nuw nsw i32 %i.08, 1 + %exitcond = icmp eq i32 %inc, %a + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %middle.block, %entry + %total.0.lcssa = phi i32 [ undef, %entry ], [ %bin.rdx, %middle.block ], [ %add, %for.body ] + call void @llvm.va_end(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #2 + ret i32 %total.0.lcssa +} + + +; 32BIT-LABEL: name: va_arg1 +; 32BIT-LABEL: liveins: +; 32BIT-DAG: - { reg: '$r3', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r4', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r5', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r6', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r7', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r8', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r9', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r10', virtual-reg: '' } +; 32BIT-LABEL: fixedStack: +; 32BIT-DAG: - { id: 0, type: default, offset: 28, size: 4 +; 32BIT-LABEL: body: | +; 32BIT-LABEL: bb.0.entry: +; 32BIT-DAG: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 +; 32BIT-DAG: renamable $cr0 = CMPWI renamable $r3, 1 +; 32BIT-DAG: STW killed renamable $r4, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0) +; 32BIT-DAG: STW killed renamable $r5, 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4) +; 32BIT-DAG: STW killed renamable $r6, 8, %fixed-stack.0 :: (store 4) +; 32BIT-DAG: STW killed renamable $r7, 12, %fixed-stack.0 :: (store 4) +; 32BIT-DAG: STW killed renamable $r8, 16, %fixed-stack.0 :: (store 4) +; 32BIT-DAG: STW killed renamable $r9, 20, %fixed-stack.0 :: (store 4) +; 32BIT-DAG: STW killed renamable $r10, 24, %fixed-stack.0 :: (store 4) +; 32BIT-DAG: renamable $r[[SCRATHREG:[0-9]+]] = ADDI %fixed-stack.0, 0 +; 32BIT-DAG: STW killed renamable $r[[SCRATHREG:[0-9]+]], 0, %stack.0.arg :: (store 4 into %ir.0) + +; ASM32PWR4-LABEL: .va_arg1: +; ASM32PWR4-DAG: cmpwi 3, 1 +; ASM32PWR4-DAG: stw 4, 28(1) +; ASM32PWR4-DAG: stw 5, 32(1) +; ASM32PWR4-DAG: stw 6, 36(1) +; ASM32PWR4-DAG: stw 7, 40(1) +; ASM32PWR4-DAG: stw 8, 44(1) +; ASM32PWR4-DAG: stw 9, 48(1) +; ASM32PWR4-DAG: stw 10, 52(1) +; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], -4(1) +; ASM32PWR4-DAG: addi [[SCRATCHREG:[0-9]+]], 1, 28 +; ASM32PWR4-DAG: blt 0, LBB0_8 + + +define i32 @va_arg2(i32 %one, i32 %two, i32 %three, i32 %four, i32 %five, i32 %six, i32 %seven, i32 %eight, ...) local_unnamed_addr #0 { +entry: + %arg = alloca i8*, align 4 + %0 = bitcast i8** %arg to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #2 + call void @llvm.va_start(i8* nonnull %0) + %add = add nsw i32 %two, %one + %add2 = add nsw i32 %add, %three + %add3 = add nsw i32 %add2, %four + %add4 = add nsw i32 %add3, %five + %add5 = add nsw i32 %add4, %six + %add6 = add nsw i32 %add5, %seven + %add7 = add nsw i32 %add6, %eight + %cmp15 = icmp sgt i32 %eight, 0 + br i1 %cmp15, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %argp.cur.pre = load i8*, i8** %arg, align 4 + %min.iters.check = icmp eq i32 %eight, 1 + br i1 %min.iters.check, label %for.body.preheader23, label %vector.memcheck + +vector.memcheck: ; preds = %for.body.preheader + %uglygep = getelementptr inbounds i8, i8* %0, i32 1 + %1 = shl i32 %eight, 2 + %scevgep = getelementptr i8, i8* %argp.cur.pre, i32 %1 + %bound0 = icmp ugt i8* %scevgep, %0 + %bound1 = icmp ult i8* %argp.cur.pre, %uglygep + %found.conflict = and i1 %bound0, %bound1 + br i1 %found.conflict, label %for.body.preheader23, label %vector.ph + +vector.ph: ; preds = %vector.memcheck + %n.vec = and i32 %eight, -2 + %2 = shl i32 %n.vec, 2 + %ind.end = getelementptr i8, i8* %argp.cur.pre, i32 %2 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.phi = phi i32 [ %add7, %vector.ph ], [ %11, %vector.body ] + %vec.phi21 = phi i32 [ 0, %vector.ph ], [ %12, %vector.body ] + %3 = shl i32 %index, 2 + %next.gep = getelementptr i8, i8* %argp.cur.pre, i32 %3 + %4 = shl i32 %index, 2 + %5 = or i32 %4, 4 + %next.gep20 = getelementptr i8, i8* %argp.cur.pre, i32 %5 + %6 = getelementptr inbounds i8, i8* %next.gep20, i32 4 + %7 = bitcast i8* %next.gep to i32* + %8 = bitcast i8* %next.gep20 to i32* + %9 = load i32, i32* %7, align 4 + %10 = load i32, i32* %8, align 4 + %11 = add i32 %9, %vec.phi + %12 = add i32 %10, %vec.phi21 + %index.next = add i32 %index, 2 + %13 = icmp eq i32 %index.next, %n.vec + br i1 %13, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + store i8* %6, i8** %arg, align 4 + %bin.rdx = add i32 %12, %11 + %cmp.n = icmp eq i32 %n.vec, %eight + br i1 %cmp.n, label %for.end, label %for.body.preheader23 + +for.body.preheader23: ; preds = %middle.block, %vector.memcheck, %for.body.preheader + %argp.cur.ph = phi i8* [ %argp.cur.pre, %vector.memcheck ], [ %argp.cur.pre, %for.body.preheader ], [ %ind.end, %middle.block ] + %total.017.ph = phi i32 [ %add7, %vector.memcheck ], [ %add7, %for.body.preheader ], [ %bin.rdx, %middle.block ] + %i.016.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ] + br label %for.body + +for.body: ; preds = %for.body.preheader23, %for.body + %argp.cur = phi i8* [ %argp.next, %for.body ], [ %argp.cur.ph, %for.body.preheader23 ] + %total.017 = phi i32 [ %add8, %for.body ], [ %total.017.ph, %for.body.preheader23 ] + %i.016 = phi i32 [ %inc, %for.body ], [ %i.016.ph, %for.body.preheader23 ] + %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4 + store i8* %argp.next, i8** %arg, align 4 + %14 = bitcast i8* %argp.cur to i32* + %15 = load i32, i32* %14, align 4 + %add8 = add nsw i32 %15, %total.017 + %inc = add nuw nsw i32 %i.016, 1 + %exitcond = icmp eq i32 %inc, %eight + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %middle.block, %entry + %total.0.lcssa = phi i32 [ %add7, %entry ], [ %bin.rdx, %middle.block ], [ %add8, %for.body ] + call void @llvm.va_end(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #2 + ret i32 %total.0.lcssa +} + +; 32BIT-LABEL: name: va_arg2 +; 32BIT-LABEL: liveins: +; 32BIT-DAG: - { reg: '$r3', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r4', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r5', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r6', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r7', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r8', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r9', virtual-reg: '' } +; 32BIT-DAG: - { reg: '$r10', virtual-reg: '' } +; 32BIT-LABEL: fixedStack: +; 32BIT-DAG: - { id: 0, type: default, offset: 56, size: 4 +; 32BIT-LABEL: body: | +; 32BIT-LABEL: bb.0.entry: +; 32BIT-DAG: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 +; 32BIT-DAG: STW killed renamable $r11, 0, %stack.0.arg :: (store 4 into %ir.0) +; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r4, killed renamable $r3 +; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r5 +; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r6 +; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r7 +; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r8 +; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r9 +; 32BIT-DAG: renamable $cr0 = CMPWI renamable $r10, 1 +; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r10 +; 32BIT-DAG: renamable $r11 = ADDI %fixed-stack.0, 0 + +; ASM32PWR4-LABEL: .va_arg2: +; ASM32PWR4-DAG: add 3, 4, 3 +; ASM32PWR4-DAG: add 3, 3, 5 +; ASM32PWR4-DAG: add 3, 3, 6 +; ASM32PWR4-DAG: add 3, 3, 7 +; ASM32PWR4-DAG: add 3, 3, 8 +; ASM32PWR4-DAG: add 3, 3, 9 +; ASM32PWR4-DAG: add 3, 3, 10 +; ASM32PWR4-DAG: cmpwi 10, 1 +; ASM32PWR4-DAG: addi [[SCRATCHREG:[0-9]+]], 1, 56 +; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], -4(1) + diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi-va_args-64.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi-va_args-64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi-va_args-64.ll @@ -0,0 +1,316 @@ +; RUN: llc -O2 -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,64BIT %s + +; RUN: llc -O2 -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=CHECKASM,ASM64PWR4 %s + + + @a = local_unnamed_addr global i32 1, align 4 + @b = local_unnamed_addr global i32 2, align 4 + @c = local_unnamed_addr global i32 3, align 4 + @d = local_unnamed_addr global i32 4, align 4 + @e = local_unnamed_addr global i32 5, align 4 + @f = local_unnamed_addr global i32 6, align 4 + @g = local_unnamed_addr global i32 7, align 4 + @h = local_unnamed_addr global i32 8, align 4 + @i = local_unnamed_addr global i32 9, align 4 + @j = local_unnamed_addr global i32 10, align 4 + +define signext i32 @va_arg1(i32 signext %a, ...) local_unnamed_addr #0 { +entry: + %arg = alloca i8*, align 8 + %0 = bitcast i8** %arg to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) #2 + call void @llvm.va_start(i8* nonnull %0) + %cmp7 = icmp sgt i32 %a, 0 + br i1 %cmp7, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %argp.cur.pre = load i8*, i8** %arg, align 8 + %1 = add i32 %a, -1 + %2 = zext i32 %1 to i64 + %3 = add nuw nsw i64 %2, 1 + %min.iters.check = icmp ult i32 %1, 8 + br i1 %min.iters.check, label %for.body.preheader28, label %vector.memcheck + +vector.memcheck: ; preds = %for.body.preheader + %uglygep = getelementptr inbounds i8, i8* %0, i64 1 + %scevgep = getelementptr i8, i8* %argp.cur.pre, i64 4 + %4 = shl nuw nsw i64 %2, 3 + %5 = add nuw nsw i64 %4, 8 + %scevgep11 = getelementptr i8, i8* %argp.cur.pre, i64 %5 + %bound0 = icmp ugt i8* %scevgep11, %0 + %bound1 = icmp ult i8* %scevgep, %uglygep + %found.conflict = and i1 %bound0, %bound1 + br i1 %found.conflict, label %for.body.preheader28, label %vector.ph + +vector.ph: ; preds = %vector.memcheck + %n.mod.vf = and i64 %3, 7 + %6 = icmp eq i64 %n.mod.vf, 0 + %7 = select i1 %6, i64 8, i64 %n.mod.vf + %n.vec = sub nsw i64 %3, %7 + %8 = shl nsw i64 %n.vec, 3 + %ind.end = getelementptr i8, i8* %argp.cur.pre, i64 %8 + %ind.end13 = trunc i64 %n.vec to i32 + %next.gep = getelementptr i8, i8* %argp.cur.pre, i64 4 + %next.gep17 = getelementptr i8, i8* %argp.cur.pre, i64 4 + %next.gep20 = getelementptr i8, i8* %argp.cur.pre, i64 8 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.phi = phi <4 x i32> [ , %vector.ph ], [ %19, %vector.body ] + %vec.phi21 = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %20, %vector.body ] + %9 = shl i64 %index, 3 + %10 = shl i64 %index, 3 + %11 = or i64 %10, 32 + %12 = shl i64 %index, 3 + %13 = or i64 %12, 56 + %14 = getelementptr inbounds i8, i8* %next.gep20, i64 %13 + %15 = getelementptr inbounds i8, i8* %next.gep, i64 %9 + %16 = getelementptr inbounds i8, i8* %next.gep17, i64 %11 + %17 = bitcast i8* %15 to <8 x i32>* + %18 = bitcast i8* %16 to <8 x i32>* + %wide.vec = load <8 x i32>, <8 x i32>* %17, align 4 + %wide.vec23 = load <8 x i32>, <8 x i32>* %18, align 4 + %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> + %strided.vec24 = shufflevector <8 x i32> %wide.vec23, <8 x i32> undef, <4 x i32> + %19 = add <4 x i32> %strided.vec, %vec.phi + %20 = add <4 x i32> %strided.vec24, %vec.phi21 + %index.next = add i64 %index, 8 + %21 = icmp eq i64 %index.next, %n.vec + br i1 %21, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + store i8* %14, i8** %arg, align 8 + %bin.rdx = add <4 x i32> %20, %19 + %rdx.shuf = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> + %bin.rdx25 = add <4 x i32> %bin.rdx, %rdx.shuf + %rdx.shuf26 = shufflevector <4 x i32> %bin.rdx25, <4 x i32> undef, <4 x i32> + %bin.rdx27 = add <4 x i32> %bin.rdx25, %rdx.shuf26 + %22 = extractelement <4 x i32> %bin.rdx27, i32 0 + br label %for.body.preheader28 + +for.body.preheader28: ; preds = %middle.block, %vector.memcheck, %for.body.preheader + %argp.cur.ph = phi i8* [ %argp.cur.pre, %vector.memcheck ], [ %argp.cur.pre, %for.body.preheader ], [ %ind.end, %middle.block ] + %total.09.ph = phi i32 [ undef, %vector.memcheck ], [ undef, %for.body.preheader ], [ %22, %middle.block ] + %i.08.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end13, %middle.block ] + br label %for.body + +for.body: ; preds = %for.body.preheader28, %for.body + %argp.cur = phi i8* [ %argp.next, %for.body ], [ %argp.cur.ph, %for.body.preheader28 ] + %total.09 = phi i32 [ %add, %for.body ], [ %total.09.ph, %for.body.preheader28 ] + %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader28 ] + %argp.next = getelementptr inbounds i8, i8* %argp.cur, i64 8 + store i8* %argp.next, i8** %arg, align 8 + %23 = getelementptr inbounds i8, i8* %argp.cur, i64 4 + %24 = bitcast i8* %23 to i32* + %25 = load i32, i32* %24, align 4 + %add = add nsw i32 %25, %total.09 + %inc = add nuw nsw i32 %i.08, 1 + %exitcond = icmp eq i32 %inc, %a + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %total.0.lcssa = phi i32 [ undef, %entry ], [ %add, %for.body ] + call void @llvm.va_end(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) #2 + ret i32 %total.0.lcssa +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: nounwind +declare void @llvm.va_start(i8*) #2 + +; Function Attrs: nounwind +declare void @llvm.va_end(i8*) #2 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; 64BIT-LABEL: name: va_arg1 +; 64BIT-LABEL: liveins: +; 64BIT-DAG: - { reg: '$x3', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x4', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x5', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x6', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x7', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x8', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x9', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x10', virtual-reg: '' } +; 64BIT-LABEL: fixedStack: +; 64BIT-DAG: - { id: 0, type: default, offset: 56, size: 8 +; 64BIT-LABEL: body: | +; 64BIT-LABEL: bb.0.entry: +; 64BIT-DAG: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 +; 64BIT-DAG: renamable $cr0 = CMPWI renamable $r3, 1 +; 64BIT-DAG: STD killed renamable $x4, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0) +; 64BIT-DAG: STD killed renamable $x5, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8) +; 64BIT-DAG: STD killed renamable $x6, 16, %fixed-stack.0 :: (store 8) +; 64BIT-DAG: STD killed renamable $x7, 24, %fixed-stack.0 :: (store 8) +; 64BIT-DAG: STD killed renamable $x8, 32, %fixed-stack.0 :: (store 8) +; 64BIT-DAG: STD killed renamable $x9, 40, %fixed-stack.0 :: (store 8) +; 64BIT-DAG: STD killed renamable $x10, 48, %fixed-stack.0 :: (store 8) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = ADDI8 %fixed-stack.0, 0 +; 64BIT-DAG: STD killed renamable $x[[SCRATCHREG:[0-9]+]], 0, %stack.0.arg :: (store 8 into %ir.0) + +; ASM64PWR4-LABEL: .va_arg1: +; ASM64PWR4-DAG: cmpwi 3, 1 +; ASM64PWR4-DAG: std 4, 56(1) +; ASM64PWR4-DAG: std 5, 64(1) +; ASM64PWR4-DAG: std 6, 72(1) +; ASM64PWR4-DAG: std 7, 80(1) +; ASM64PWR4-DAG: std 8, 88(1) +; ASM64PWR4-DAG: std 9, 96(1) +; ASM64PWR4-DAG: std 10, 104(1) +; ASM64PWR4-DAG: addi [[SCRATCHREG:[0-9]+]], 1, 56 + +define signext i32 @va_arg2(i32 signext %one, i32 signext %two, i32 signext %three, i32 signext %four, i32 signext %five, i32 signext %six, i32 signext %seven, i32 signext %eight, ...) local_unnamed_addr #0 { +entry: + %arg = alloca i8*, align 8 + %0 = bitcast i8** %arg to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) #2 + call void @llvm.va_start(i8* nonnull %0) + %add = add nsw i32 %two, %one + %add2 = add nsw i32 %add, %three + %add3 = add nsw i32 %add2, %four + %add4 = add nsw i32 %add3, %five + %add5 = add nsw i32 %add4, %six + %add6 = add nsw i32 %add5, %seven + %add7 = add nsw i32 %add6, %eight + %cmp15 = icmp sgt i32 %eight, 0 + br i1 %cmp15, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %argp.cur.pre = load i8*, i8** %arg, align 8 + %1 = add i32 %eight, -1 + %2 = zext i32 %1 to i64 + %3 = add nuw nsw i64 %2, 1 + %min.iters.check = icmp ult i32 %1, 8 + br i1 %min.iters.check, label %for.body.preheader36, label %vector.memcheck + +vector.memcheck: ; preds = %for.body.preheader + %uglygep = getelementptr inbounds i8, i8* %0, i64 1 + %scevgep = getelementptr i8, i8* %argp.cur.pre, i64 4 + %4 = shl nuw nsw i64 %2, 3 + %5 = add nuw nsw i64 %4, 8 + %scevgep19 = getelementptr i8, i8* %argp.cur.pre, i64 %5 + %bound0 = icmp ugt i8* %scevgep19, %0 + %bound1 = icmp ult i8* %scevgep, %uglygep + %found.conflict = and i1 %bound0, %bound1 + br i1 %found.conflict, label %for.body.preheader36, label %vector.ph + +vector.ph: ; preds = %vector.memcheck + %n.mod.vf = and i64 %3, 7 + %6 = icmp eq i64 %n.mod.vf, 0 + %7 = select i1 %6, i64 8, i64 %n.mod.vf + %n.vec = sub nsw i64 %3, %7 + %8 = shl nsw i64 %n.vec, 3 + %ind.end = getelementptr i8, i8* %argp.cur.pre, i64 %8 + %ind.end21 = trunc i64 %n.vec to i32 + %9 = insertelement <4 x i32> , i32 %add7, i32 0 + %next.gep = getelementptr i8, i8* %argp.cur.pre, i64 4 + %next.gep25 = getelementptr i8, i8* %argp.cur.pre, i64 4 + %next.gep28 = getelementptr i8, i8* %argp.cur.pre, i64 8 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.phi = phi <4 x i32> [ %9, %vector.ph ], [ %20, %vector.body ] + %vec.phi29 = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %21, %vector.body ] + %10 = shl i64 %index, 3 + %11 = shl i64 %index, 3 + %12 = or i64 %11, 32 + %13 = shl i64 %index, 3 + %14 = or i64 %13, 56 + %15 = getelementptr inbounds i8, i8* %next.gep28, i64 %14 + %16 = getelementptr inbounds i8, i8* %next.gep, i64 %10 + %17 = getelementptr inbounds i8, i8* %next.gep25, i64 %12 + %18 = bitcast i8* %16 to <8 x i32>* + %19 = bitcast i8* %17 to <8 x i32>* + %wide.vec = load <8 x i32>, <8 x i32>* %18, align 4 + %wide.vec31 = load <8 x i32>, <8 x i32>* %19, align 4 + %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> + %strided.vec32 = shufflevector <8 x i32> %wide.vec31, <8 x i32> undef, <4 x i32> + %20 = add <4 x i32> %strided.vec, %vec.phi + %21 = add <4 x i32> %strided.vec32, %vec.phi29 + %index.next = add i64 %index, 8 + %22 = icmp eq i64 %index.next, %n.vec + br i1 %22, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + store i8* %15, i8** %arg, align 8 + %bin.rdx = add <4 x i32> %21, %20 + %rdx.shuf = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> + %bin.rdx33 = add <4 x i32> %bin.rdx, %rdx.shuf + %rdx.shuf34 = shufflevector <4 x i32> %bin.rdx33, <4 x i32> undef, <4 x i32> + %bin.rdx35 = add <4 x i32> %bin.rdx33, %rdx.shuf34 + %23 = extractelement <4 x i32> %bin.rdx35, i32 0 + br label %for.body.preheader36 + +for.body.preheader36: ; preds = %middle.block, %vector.memcheck, %for.body.preheader + %argp.cur.ph = phi i8* [ %argp.cur.pre, %vector.memcheck ], [ %argp.cur.pre, %for.body.preheader ], [ %ind.end, %middle.block ] + %total.017.ph = phi i32 [ %add7, %vector.memcheck ], [ %add7, %for.body.preheader ], [ %23, %middle.block ] + %i.016.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end21, %middle.block ] + br label %for.body + +for.body: ; preds = %for.body.preheader36, %for.body + %argp.cur = phi i8* [ %argp.next, %for.body ], [ %argp.cur.ph, %for.body.preheader36 ] + %total.017 = phi i32 [ %add8, %for.body ], [ %total.017.ph, %for.body.preheader36 ] + %i.016 = phi i32 [ %inc, %for.body ], [ %i.016.ph, %for.body.preheader36 ] + %argp.next = getelementptr inbounds i8, i8* %argp.cur, i64 8 + store i8* %argp.next, i8** %arg, align 8 + %24 = getelementptr inbounds i8, i8* %argp.cur, i64 4 + %25 = bitcast i8* %24 to i32* + %26 = load i32, i32* %25, align 4 + %add8 = add nsw i32 %26, %total.017 + %inc = add nuw nsw i32 %i.016, 1 + %exitcond = icmp eq i32 %inc, %eight + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %total.0.lcssa = phi i32 [ %add7, %entry ], [ %add8, %for.body ] + call void @llvm.va_end(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) #2 + ret i32 %total.0.lcssa +} + +; 64BIT-LABEL: name: va_arg2 +; 64BIT-LABEL: liveins: +; 64BIT-DAG: - { reg: '$x3', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x4', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x5', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x6', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x7', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x8', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x9', virtual-reg: '' } +; 64BIT-DAG: - { reg: '$x10', virtual-reg: '' } +; 64BIT-LABEL: fixedStack: +; 64BIT-DAG: - { id: 0, type: default, offset: 112, size: 8 +; 64BIT-LABEL: body: | +; 64BIT-LABEL: bb.0.entry: +; 64BIT-DAG: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 +; 64BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = nsw ADD4 renamable $r4, renamable $r3, implicit killed $x3, implicit killed $x4 +; 64BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = nsw ADD4 killed renamable $r[[SCRATCHREG:[0-9]+]], renamable $r5, implicit killed $x5 +; 64BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = nsw ADD4 killed renamable $r[[SCRATCHREG:[0-9]+]], renamable $r6, implicit killed $x6 +; 64BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = nsw ADD4 killed renamable $r[[SCRATCHREG:[0-9]+]], renamable $r7, implicit killed $x7 +; 64BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = nsw ADD4 killed renamable $r[[SCRATCHREG:[0-9]+]], renamable $r8, implicit killed $x8 +; 64BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = nsw ADD4 killed renamable $r[[SCRATCHREG:[0-9]+]], renamable $r9, implicit killed $x9 +; 64BIT-DAG: renamable $cr0 = CMPWI renamable $r10, 1 +; 64BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = nsw ADD4 killed renamable $r[[SCRATCHREG:[0-9]+]], renamable $r10 +; 64BIT-DAG: STD killed renamable $x[[SCRATCHREG:[0-9]+]], 0, %stack.0.arg :: (store 8 into %ir.0) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = ADDI8 %fixed-stack.0, 0 + +; ASM64PWR4-LABEL: .va_arg2: +; ASM64PWR4-DAG: add 3, 4, 3 +; ASM64PWR4-DAG: add 3, 3, 5 +; ASM64PWR4-DAG: add 3, 3, 6 +; ASM64PWR4-DAG: add 3, 3, 7 +; ASM64PWR4-DAG: add 3, 3, 8 +; ASM64PWR4-DAG: add 3, 3, 9 +; ASM64PWR4-DAG: add 3, 3, 10 +; ASM64PWR4-DAG: cmpwi 10, 1 +; ASM64PWR4-DAG: addi [[SCRATCHREG:[0-9]+]], 1, 112