diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -1318,6 +1318,7 @@ BCLalways, BCLn, BCTRL8_LDinto_toc, + BCTRL_LWZinto_toc, BCn, CTRL_DEP )>; diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2441,10 +2441,6 @@ } unsigned PPCFrameLowering::getTOCSaveOffset() const { - if (Subtarget.isAIXABI()) - // TOC save/restore is normally handled by the linker. - // Indirect calls should hit this limitation. - report_fatal_error("TOC save is not implemented on AIX yet."); return TOCSaveOffset; } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -174,7 +174,8 @@ BCTRL, /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl - /// instruction and the TOC reload required on SVR4 PPC64. + /// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX + /// and 64-bit AIX. BCTRL_LOAD_TOC, /// Return with a flag operand, matched by 'blr' diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3153,11 +3153,17 @@ SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget.isAIXABI()) + report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX."); + return Op.getOperand(0); } SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget.isAIXABI()) + report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX."); + SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -5208,34 +5214,48 @@ MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr); + // Registers used in building the DAG. + const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister(); + const MCRegister TOCReg = Subtarget.getTOCPointerRegister(); + + // Offsets of descriptor members. + const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset(); + const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset(); + + const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; + const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4; + // One load for the functions entry point address. - SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, - /* Alignment = */ 8, MMOFlags); + SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI, + Alignment, MMOFlags); // One for loading the TOC anchor for the module that contains the called // function. - SDValue TOCOff = DAG.getIntPtrConstant(8, dl); - SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); + SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl); + SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff); SDValue TOCPtr = - DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), - /* Alignment = */ 8, MMOFlags); + DAG.getLoad(RegVT, dl, LDChain, AddTOC, + MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags); // One for loading the environment pointer. - SDValue PtrOff = DAG.getIntPtrConstant(16, dl); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); + SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff); SDValue LoadEnvPtr = - DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), - /* Alignment = */ 8, MMOFlags); + DAG.getLoad(RegVT, dl, LDChain, AddPtr, + MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags); + // Then copy the newly loaded TOC anchor to the TOC pointer. - SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, Glue); + SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue); Chain = TOCVal.getValue(0); Glue = TOCVal.getValue(1); // If the function call has an explicit 'nest' parameter, it takes the // place of the environment pointer. + assert((!hasNest || !Subtarget.isAIXABI()) && + "Nest parameter is not supported on AIX."); if (!hasNest) { - SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, Glue); + SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue); Chain = EnvVal.getValue(0); Glue = EnvVal.getValue(1); } @@ -5264,27 +5284,29 @@ Ops.push_back(Callee); else { assert(!isPatchPoint && "Patch point call are not indirect."); - if (Subtarget.isAIXABI()) - report_fatal_error("Indirect call on AIX is not implemented."); - - // For 64-bit ELF we have saved the TOC pointer to the linkage area on the - // stack (this would have been done in `LowerCall_64SVR4`). The call - // instruction is a pseudo instruction that represents both the indirect - // branch and a load that restores the TOC pointer from the linkage area. - // The operand for the TOC restore is an add of the TOC save offset to the - // stack pointer. This must be the second operand: after the chain input but - // before any other variadic arguments. - if (Subtarget.is64BitELFABI()) { - SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); + + // For the TOC based ABIs, we have saved the TOC pointer to the linkage area + // on the stack (this would have been done in `LowerCall_64SVR4` or + // `LowerCall_AIX`). The call instruction is a pseudo instruction that + // represents both the indirect branch and a load that restores the TOC + // pointer from the linkage area. The operand for the TOC restore is an add + // of the TOC save offset to the stack pointer. This must be the second + // operand: after the chain input but before any other variadic arguments. + if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { + const MCRegister StackPtrReg = Subtarget.getStackPointerRegister(); + + SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT); unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); - SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); + SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff); Ops.push_back(AddTOC); } // Add the register used for the environment pointer. if (Subtarget.usesFunctionDescriptors() && !hasNest) - Ops.push_back(DAG.getRegister(PPC::X11, MVT::i64)); + Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(), + RegVT)); + // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) @@ -5305,7 +5327,7 @@ // no way to mark dependencies as implicit here. // We will add the R2/X2 dependency in EmitInstrWithCustomInserter. if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && !isPatchPoint) - Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::X2 : PPC::R2, RegVT)); + Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT)); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls if (isVarArg && Subtarget.is32BitELFABI()) @@ -6961,9 +6983,6 @@ if (isVarArg || isPatchPoint) report_fatal_error("This call type is unimplemented on AIX."); - if (!isFunctionGlobalAddress(Callee) && !isa(Callee)) - report_fatal_error("Handling of indirect call is unimplemented!"); - const PPCSubtarget& Subtarget = static_cast(DAG.getSubtarget()); if (Subtarget.hasQPX()) @@ -7022,6 +7041,26 @@ "unimplemented!"); } + // For indirect calls, we need to save the TOC base to the stack for + // restoration after the call. + if (!isTailCall && !isPatchPoint && + !isFunctionGlobalAddress(Callee) && !isa(Callee)) { + const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister(); + const MCRegister StackPtrReg = Subtarget.getStackPointerRegister(); + const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; + const unsigned TOCSaveOffset = + Subtarget.getFrameLowering()->getTOCSaveOffset(); + + setUsesTOCBasePtr(DAG); + SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT); + SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); + SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + Chain = DAG.getStore( + Val.getValue(1), dl, Val, AddPtr, + MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); + } + // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td --- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -1529,6 +1529,29 @@ let BH = 0; } +class XLForm_2_ext_and_DForm_1 opcode1, bits<10> xo1, bits<5> bo, + bits<5> bi, bit lk, bits<6> opcode2, dag OOL, + dag IOL, string asmstr, InstrItinClass itin, + list pattern> + : I2 { + + bits<5> RST; + bits<21> D_RA; + + let Pattern = pattern; + + let Inst{6-10} = bo; + let Inst{11-15} = bi; + let Inst{16-18} = 0; + let Inst{19-20} = 0; // Unused (BH) + let Inst{21-30} = xo1; + let Inst{31} = lk; + + let Inst{38-42} = RST; + let Inst{43-47} = D_RA{20-16}; // Base Register + let Inst{48-63} = D_RA{15-0}; // Displacement +} + // 1.7.8 XFX-Form class XFXForm_1 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin> diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1645,6 +1645,15 @@ "#TC_RETURNr $dst $offset", []>; +let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, + Defs = [LR, R2], Uses = [CTR, RM], RST = 2 in { + def BCTRL_LWZinto_toc: + XLForm_2_ext_and_DForm_1<19, 528, 20, 0, 1, 32, (outs), + (ins memri:$src), "bctrl\n\tlwz 2, $src", IIC_BrB, + [(PPCbctrl_load_toc iaddr:$src)]>, Requires<[In32BitMode]>; + +} + let isCodeGenOnly = 1 in { diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -358,6 +358,34 @@ return isAIXABI() || (is64BitELFABI() && !isELFv2ABI()); } + unsigned descriptorTOCAnchorOffset() const { + assert(usesFunctionDescriptors() && + "Should only be called when the target uses descriptors."); + return IsPPC64 ? 8 : 4; + } + + unsigned descriptorEnvironmentPointerOffset() const { + assert(usesFunctionDescriptors() && + "Should only be called when the target uses descriptors."); + return IsPPC64 ? 16 : 8; + } + + MCRegister getEnvironmentPointerRegister() const { + assert(usesFunctionDescriptors() && + "Should only be called when the target uses descriptors."); + return IsPPC64 ? PPC::X11 : PPC::R11; + } + + MCRegister getTOCPointerRegister() const { + assert((is64BitELFABI() || isAIXABI()) && + "Should only be called when the target is a TOC based ABI."); + return IsPPC64 ? PPC::X2 : PPC::R2; + } + + MCRegister getStackPointerRegister() const { + return IsPPC64 ? PPC::X1 : PPC::R1; + } + bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; } }; } // End llvm namespace diff --git a/llvm/test/CodeGen/PowerPC/aix-trampoline.ll b/llvm/test/CodeGen/PowerPC/aix-trampoline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-trampoline.ll @@ -0,0 +1,14 @@ +; RUN: not llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s +; RUN: not llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: INIT_TRAMPOLINE operation is not supported on AIX. + +define void @create_trampoline(i8* %buffer, i8* %nval) nounwind { +entry: + call void @llvm.init.trampoline(i8* %buffer, i8* bitcast (i32 (i32)* @nested to i8*) , i8* %nval) + ret void +} + +declare i32 @nested(i32); + +declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind diff --git a/llvm/test/CodeGen/PowerPC/aix_indirect_call.ll b/llvm/test/CodeGen/PowerPC/aix_indirect_call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix_indirect_call.ll @@ -0,0 +1,143 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp < %s | \ +; RUN: FileCheck --check-prefixes=CHECKMIR,MIR32 %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=CHECKASM,ASMOBJ32,ASM32 %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp < %s | \ +; RUN: FileCheck --check-prefixes=CHECKMIR,MIR64 %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=CHECKASM,ASM64 %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff -filetype=obj < %s -o %t +; RUN: llvm-objdump -d %t | FileCheck --check-prefixes=ASMOBJ32,OBJ32 %s + +define signext i32 @callThroughPtr(i32 ()* nocapture) { + %2 = tail call signext i32 %0() + ret i32 %2 +} + +; CHECKMIR: name: callThroughPtr + +; MIR32: liveins: $r3 +; MIR32: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; MIR32-DAG: STW $r2, 20, $r1 +; MIR32-DAG: renamable $r11 = LWZ 8, renamable $r3 :: (dereferenceable invariant load 4 from %ir.0 + 8) +; MIR32-DAG: renamable $[[REG:r[0-9]+]] = LWZ 0, renamable $r3 :: (dereferenceable invariant load 4 from %ir.0) +; MIR32-DAG: $r2 = LWZ 4, killed renamable $r3 :: (dereferenceable invariant load 4 from %ir.0 + 4) +; MIR32-DAG: MTCTR killed renamable $[[REG]], implicit-def $ctr +; MIR32-NEXT: BCTRL_LWZinto_toc 20, $r1, csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $ctr, implicit $rm, implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3 +; MIR32-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; MIR64: liveins: $x3 +; MIR64: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; MIR64-DAG: STD $x2, 40, $x1 :: (store 8 into stack + 40) +; MIR64-DAG: renamable $x11 = LD 16, renamable $x3 :: (dereferenceable invariant load 8 from %ir.0 + 16) +; MIR64-DAG: renamable $[[REG:x[0-9]+]] = LD 0, renamable $x3 :: (dereferenceable invariant load 8 from %ir.0) +; MIR64-DAG: $x2 = LD 8, killed renamable $x3 :: (dereferenceable invariant load 8 from %ir.0 + 8) +; MIR64-DAG: MTCTR8 killed renamable $[[REG]], implicit-def $ctr8 +; MIR64-NEXT: BCTRL8_LDinto_toc 40, $x1, csr_aix64, implicit-def dead $lr8, implicit-def dead $x2, implicit $ctr8, implicit $rm, implicit $x11, implicit $x2, implicit-def $r1, implicit-def $x3 +; MIR64-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; CHECKASM-LABEL: .callThroughPtr: + +; ASM32: stwu 1, -64(1) +; ASM32-DAG: lwz [[REG:[0-9]+]], 0(3) +; ASM32-DAG: stw 2, 20(1) +; ASM32-DAG: mtctr [[REG]] +; ASM32-DAG: lwz 11, 8(3) +; ASM32-DAG: lwz 2, 4(3) +; ASM32-NEXT: bctrl +; ASM32-NEXT: lwz 2, 20(1) +; ASM32-NEXT: addi 1, 1, 64 + +; ASM64: stdu 1, -112(1) +; ASM64-DAG: ld [[REG:[0-9]+]], 0(3) +; ASM64-DAG: std 2, 40(1) +; ASM64-DAG: mtctr [[REG]] +; ASM64-DAG: ld 11, 16(3) +; ASM64-DAG: ld 2, 8(3) +; ASM64-NEXT: bctrl +; ASM64-NEXT: ld 2, 40(1) +; ASM64-NEXT: addi 1, 1, 112 + +; OBJ32-LABEL: .text: +; OBJ32: stwu 1, -64(1) +; OBJ32-DAG: lwz [[REG:[0-9]+]], 0(3) +; OBJ32-DAG: stw 2, 20(1) +; OBJ32-DAG: mtctr [[REG]] +; OBJ32-DAG: lwz 11, 8(3) +; OBJ32-DAG: lwz 2, 4(3) +; OBJ32-NEXT: 4e 80 04 21 bctrl +; OBJ32-NEXT: 80 41 00 14 lwz 2, 20(1) +; OBJ32-NEXT: addi 1, 1, 64 + +define void @callThroughPtrWithArgs(void (i32, i16, i64)* nocapture) { + tail call void %0(i32 signext 1, i16 zeroext 2, i64 3) + ret void +} + +; CHECKMIR: name: callThroughPtrWithArgs + +; MIR32: liveins: $r3 +; MIR32: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; MIR32-DAG: renamable $[[REG:r[0-9]+]] = LWZ 0, renamable $r3 :: (dereferenceable invariant load 4 from %ir.0) +; MIR32-DAG: MTCTR killed renamable $[[REG]], implicit-def $ctr +; MIR32-DAG: STW $r2, 20, $r1 :: (store 4 into stack + 20) +; MIR32-DAG: renamable $r11 = LWZ 8, renamable $r3 :: (dereferenceable invariant load 4 from %ir.0 + 8) +; MIR32-DAG: $r2 = LWZ 4, killed renamable $r3 :: (dereferenceable invariant load 4 from %ir.0 + 4) +; MIR32-DAG: $r3 = LI 1 +; MIR32-DAG: $r4 = LI 2 +; MIR32-DAG: $r5 = LI 0 +; MIR32-DAG: $r6 = LI 3 +; MIR32-NEXT: BCTRL_LWZinto_toc 20, $r1, csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $ctr, implicit $rm, implicit $r11, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r2, implicit-def $r1 +; MIR32-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; MIR64: liveins: $x3 +; MIR64: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; MIR64-DAG: renamable $[[REG:x[0-9]+]] = LD 0, renamable $x3 :: (dereferenceable invariant load 8 from %ir.0) +; MIR64-DAG: MTCTR8 killed renamable $[[REG]], implicit-def $ctr8 +; MIR64-DAG: STD $x2, 40, $x1 :: (store 8 into stack + 40) +; MIR64-DAG: renamable $x11 = LD 16, renamable $x3 :: (dereferenceable invariant load 8 from %ir.0 + 16) +; MIR64-DAG: $x2 = LD 8, killed renamable $x3 :: (dereferenceable invariant load 8 from %ir.0 + 8) +; MIR64-DAG: $x3 = LI8 1 +; MIR64-DAG: $x4 = LI8 2 +; MIR64-DAG: $x5 = LI8 3 +; MIR64-NEXT: BCTRL8_LDinto_toc 40, $x1, csr_aix64, implicit-def dead $lr8, implicit-def dead $x2, implicit $ctr8, implicit $rm, implicit $x11, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1 +; MIR64-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; CHECKASM-LABEL: .callThroughPtrWithArgs: +; CHECKOBJ-LABEL: .callThroughPtrWithArgs: + +; ASMOBJ32: stwu 1, -64(1) +; ASMOBJ32-DAG: lwz [[REG:[0-9]+]], 0(3) +; ASMOBJ32-DAG: li 5, 0 +; ASMOBJ32-DAG: li 6, 3 +; ASMOBJ32-DAG: stw 2, 20(1) +; ASMOBJ32-DAG: mtctr [[REG]] +; ASMOBJ32-DAG: li 4, 2 +; ASMOBJ32-DAG: lwz 11, 8(3) +; ASMOBJ32-DAG: lwz 2, 4(3) +; ASMOBJ32-DAG: li 3, 1 +; ASMOBJ32-NEXT: bctrl +; ASMOBJ32-NEXT: lwz 2, 20(1) +; ASMOBJ32-NEXT: addi 1, 1, 64 + +; ASM64: stdu 1, -112(1) +; ASM64-DAG: ld [[REG:[0-9]+]], 0(3) +; ASM64-DAG: li 5, 3 +; ASM64-DAG: std 2, 40(1) +; ASM64-DAG: mtctr [[REG]] +; ASM64-DAG: li 4, 2 +; ASM64-DAG: ld 11, 16(3) +; ASM64-DAG: ld 2, 8(3) +; ASM64-DAG: li 3, 1 +; ASM64-NEXT: bctrl +; ASM64-NEXT: ld 2, 40(1) +; ASM64-NEXT: addi 1, 1, 112