diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/llvm/lib/Target/PowerPC/PPCFrameLowering.h --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -153,10 +153,6 @@ /// base pointer. unsigned getBasePointerSaveOffset() const; - /// getCRSaveOffset - Return the previous frame offset to save the - /// CR register. - unsigned getCRSaveOffset() const { return CRSaveOffset; } - /// getLinkageSize - Return the size of the PowerPC ABI linkage area. /// unsigned getLinkageSize() const { return LinkageSize; } diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -79,9 +79,8 @@ : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; } -static unsigned computeCRSaveOffset() { - // The condition register save offset needs to be updated for AIX PPC32. - return 8; +static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { + return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; } PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) @@ -92,7 +91,7 @@ FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), LinkageSize(computeLinkageSize(Subtarget)), BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), - CRSaveOffset(computeCRSaveOffset()) {} + CRSaveOffset(computeCRSaveOffset(Subtarget)) {} // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( @@ -760,7 +759,8 @@ MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); DebugLoc dl; - bool needsCFI = MF.needsFrameMoves(); + // AIX assembler does not support cfi directives. + bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); // Get processor type. bool isPPC64 = Subtarget.isPPC64(); @@ -831,6 +831,9 @@ : PPC::SUBFC); const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 : PPC::SUBFIC); + const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 + : PPC::MFCR); + const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); // Regarding this assert: Even though LR is saved in the caller's frame (i.e., // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no @@ -892,12 +895,6 @@ // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. bool isLargeFrame = !isInt<16>(NegFrameSize); - assert((isPPC64 || !MustSaveCR) && - "Prologue CR saving supported only in 64-bit mode"); - - if (MustSaveCR && isAIXABI) - report_fatal_error("Prologue CR saving is unimplemented on AIX."); - // Check if we can move the stack update instruction (stdu) down the prologue // past the callee saves. Hopefully this will avoid the situation where the // saves are waiting for the update on the store with update to complete. @@ -944,20 +941,20 @@ // If only one or two CR fields are clobbered, it is more efficient to use // mfocrf to selectively save just those fields, because mfocrf has short // latency compares to mfcr. - unsigned MfcrOpcode = PPC::MFCR8; - unsigned CrState = RegState::ImplicitKill; if (isELFv2ABI && MustSaveCRs.size() == 1) { - MfcrOpcode = PPC::MFOCRF8; - CrState = RegState::Kill; + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); + MIB.addReg(MustSaveCRs[0], RegState::Kill); + } else { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); + for (unsigned CRfield : MustSaveCRs) + MIB.addReg(CRfield, RegState::ImplicitKill); } - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); - for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) - MIB.addReg(MustSaveCRs[i], CrState); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) - .addReg(TempReg, getKillRegState(true)) - .addImm(getCRSaveOffset()) - .addReg(SPReg); + BuildMI(MBB, MBBI, dl, StoreWordInst) + .addReg(TempReg, getKillRegState(true)) + .addImm(CRSaveOffset) + .addReg(SPReg); } if (MustSaveLR) @@ -969,16 +966,16 @@ // If only one or two CR fields are clobbered, it is more efficient to use // mfocrf to selectively save just those fields, because mfocrf has short // latency compares to mfcr. - unsigned MfcrOpcode = PPC::MFCR8; - unsigned CrState = RegState::ImplicitKill; if (isELFv2ABI && MustSaveCRs.size() == 1) { - MfcrOpcode = PPC::MFOCRF8; - CrState = RegState::Kill; + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); + MIB.addReg(MustSaveCRs[0], RegState::Kill); + } else { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); + for (unsigned CRfield : MustSaveCRs) + MIB.addReg(CRfield, RegState::ImplicitKill); } - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); - for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) - MIB.addReg(MustSaveCRs[i], CrState); } if (HasRedZone) { @@ -1008,9 +1005,9 @@ if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 assert(HasRedZone && "A red zone is always available on PPC64"); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) + BuildMI(MBB, MBBI, dl, StoreWordInst) .addReg(TempReg, getKillRegState(true)) - .addImm(getCRSaveOffset()) + .addImm(CRSaveOffset) .addReg(SPReg); } @@ -1314,7 +1311,7 @@ // actually saved gets its own CFI record. unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset())); + nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); continue; @@ -1395,7 +1392,10 @@ : PPC::ADDI ); const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 : PPC::ADD4 ); - + const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 + : PPC::LWZ); + const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 + : PPC::MTOCRF); int LROffset = getReturnSaveOffset(); int FPOffset = 0; @@ -1570,20 +1570,17 @@ // value (although not the base register). Make sure it is not overwritten // too early. - assert((isPPC64 || !MustSaveCR) && - "Epilogue CR restoring supported only in 64-bit mode"); - // If we need to restore both the LR and the CR and we only have one // available scratch register, we must do them one at a time. if (MustSaveCR && SingleScratchReg && MustSaveLR) { // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg // is live here. assert(HasRedZone && "Expecting red zone"); - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) - .addImm(getCRSaveOffset()) + BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) + .addImm(CRSaveOffset) .addReg(SPReg); for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) + BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) .addReg(TempReg, getKillRegState(i == e-1)); } @@ -1601,10 +1598,9 @@ if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { // This will only occur for PPC64. - assert(isPPC64 && "Expecting 64-bit mode"); assert(RBReg == SPReg && "Should be using SP as a base register"); - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) - .addImm(getCRSaveOffset()) + BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) + .addImm(CRSaveOffset) .addReg(RBReg); } @@ -1661,7 +1657,7 @@ if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) + BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) .addReg(TempReg, getKillRegState(i == e-1)); if (MustSaveLR) @@ -1805,8 +1801,7 @@ // save and restore of the condition register will be created as part of the // prologue and epilogue insertion, but the FixedStack object is needed to // keep the CalleSavedInfo valid. - if (Subtarget.isSVR4ABI() && - (SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || + if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || SavedRegs.test(PPC::CR4))) { const uint64_t SpillSize = 4; // Condition register is always 4 bytes. const int64_t SpillOffset = Subtarget.isPPC64() ? 8 : -4; @@ -2166,11 +2161,6 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef CSI, const TargetRegisterInfo *TRI) const { - // Currently, this function only handles SVR4 32- and 64-bit ABIs. - // Return false otherwise to maintain pre-existing behavior. - if (!Subtarget.isSVR4ABI()) - return false; - MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); PPCFunctionInfo *FI = MF->getInfo(); @@ -2210,7 +2200,7 @@ // Insert the spill to the stack frame. if (IsCRField) { PPCFunctionInfo *FuncInfo = MF->getInfo(); - if (Subtarget.isPPC64()) { + if (!Subtarget.is32BitELFABI()) { // The actual spill will happen at the start of the prologue. FuncInfo->addMustSaveCR(Reg); } else { @@ -2253,39 +2243,36 @@ return true; } -static void restoreCRs(bool isPPC64, bool is31, bool CR2Spilled, - bool CR3Spilled, bool CR4Spilled, MachineBasicBlock &MBB, +static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, + bool CR4Spilled, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - ArrayRef CSI, unsigned CSIIndex) { + const std::vector &CSI, + unsigned CSIIndex) { MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); DebugLoc DL; unsigned RestoreOp, MoveReg; - if (isPPC64) - // This is handled during epilogue generation. - return; - else { - // 32-bit: FP-relative - MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), - PPC::R12), - CSI[CSIIndex].getFrameIdx())); - RestoreOp = PPC::MTOCRF; - MoveReg = PPC::R12; - } + // 32-bit: FP-relative + MBB.insert(MI, + addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::R12), + CSI[CSIIndex].getFrameIdx())); + RestoreOp = PPC::MTOCRF; + MoveReg = PPC::R12; if (CR2Spilled) - MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) - .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); + MBB.insert( + MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) + .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); if (CR3Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) - .addReg(MoveReg, getKillRegState(!CR4Spilled))); + .addReg(MoveReg, getKillRegState(!CR4Spilled))); if (CR4Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) - .addReg(MoveReg, getKillRegState(true))); + .addReg(MoveReg, getKillRegState(true))); } MachineBasicBlock::iterator PPCFrameLowering:: @@ -2332,12 +2319,6 @@ MachineBasicBlock::iterator MI, std::vector &CSI, const TargetRegisterInfo *TRI) const { - - // Currently, this function only handles SVR4 32- and 64-bit ABIs. - // Return false otherwise to maintain pre-existing behavior. - if (!Subtarget.isSVR4ABI()) - return false; - MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); PPCFunctionInfo *FI = MF->getInfo(); @@ -2378,14 +2359,13 @@ CR4Spilled = true; continue; } else { - // When we first encounter a non-CR register after seeing at + // On 32-bit ELF when we first encounter a non-CR register after seeing at // least one CR register, restore all spilled CRs together. - if ((CR2Spilled || CR3Spilled || CR4Spilled) - && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + if ((CR2Spilled || CR3Spilled || CR4Spilled) && + !(PPC::CR2 <= Reg && Reg <= PPC::CR4) && Subtarget.is32BitELFABI()) { bool is31 = needsFP(*MF); - restoreCRs(Subtarget.isPPC64(), is31, - CR2Spilled, CR3Spilled, CR4Spilled, - MBB, I, CSI, CSIIndex); + restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, + CSIIndex); CR2Spilled = CR3Spilled = CR4Spilled = false; } @@ -2421,11 +2401,10 @@ } } - // If we haven't yet spilled the CRs, do so now. - if (CR2Spilled || CR3Spilled || CR4Spilled) { + // If we haven't yet spilled the CRs on 32-bit ELF, do so now. + if ((CR2Spilled || CR3Spilled || CR4Spilled) && Subtarget.is32BitELFABI()) { bool is31 = needsFP(*MF); - restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, - MBB, I, CSI, CSIIndex); + restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); } return true; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -926,14 +926,15 @@ bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const { - const PPCSubtarget &Subtarget = MF.getSubtarget(); - // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4 - // ABI, return true to prevent allocating an additional frame slot. - // For 64-bit, the CR save area is in the linkage area at SP+8; but we have - // created a FrameIndex to that spill slot to keep the CalleSaveInfos valid. - // For 32-bit, we have previously created the stack slot if needed, so return - // its FrameIdx. - if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) { + // For the nonvolatile condition registers (CR2, CR3, CR4) return true to + // prevent allocating an additional frame slot. + // For 64-bit ELF and AIX, the CR save area is in the linkage area at SP+8, + // for 32-bit AIX thhe CR save area is in the linkage area at SP+4. + // We have created a FrameIndex to that spill slot to keep the CalleSaveInfos + // valid. + // For 32-bit ELF, we have previously created the stack slot if needed, so + // return its FrameIdx. + if (PPC::CR2 <= Reg && Reg <= PPC::CR4) { FrameIdx = MF.getInfo()->getCRSpillFrameIndex(); return true; } diff --git a/llvm/test/CodeGen/PowerPC/ppc64-alloca-crspill.ll b/llvm/test/CodeGen/PowerPC/ppc64-alloca-crspill.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-alloca-crspill.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-alloca-crspill.ll @@ -1,5 +1,12 @@ ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu --verify-machineinstrs \ -; RUN: -stop-after=prologepilog < %s | FileCheck %s +; RUN: -stop-after=prologepilog < %s | FileCheck --check-prefixes=CHECK,ELFV2 %s + +; RUN: llc -mtriple=powerpc64-unknown-aix-xcoff -mcpu=pwr4 \ +; RUN: --verify-machineinstrs --mattr=-altivec -stop-after=prologepilog < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,V1ANDAIX %s + +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 --verify-machineinstrs \ +; RUN: -stop-after=prologepilog < %s | FileCheck --check-prefixes=CHECK,V1ANDAIX %s define dso_local signext i32 @test(i32 signext %n) { entry: @@ -16,8 +23,12 @@ ; CHECK: alignment: 16 ; CHECK: liveins: ; CHECK: - { reg: '$x3', virtual-reg: '' } -; CHECK: stackSize: 48 -; CHECK: maxCallFrameSize: 32 + +; ELFV2: stackSize: 48 +; V1ANDAIX: stackSize: 128 + +; ELFV2: maxCallFrameSize: 32 +; V1ANDAIX: maxCallFrameSize: 112 ; CHECK: fixedStack: ; CHECK-NEXT: - { id: 0, type: default, offset: 8, size: 4, alignment: 8, stack-id: default, @@ -41,18 +52,23 @@ ; CHECK-NEXT: liveins: $x3, $cr2 ; Prologue: -; CHECK: $x0 = MFLR8 implicit $lr8 -; CHECK-NEXT: $x12 = MFOCRF8 killed $cr2 -; CHECK-DAG: STD $x31, -8, $x1 -; CHECK-DAG: STD killed $x0, 16, $x1 -; CHECK-DAG: STW8 killed $x12, 8, $x1 -; CHECK-NEXT: $x1 = STDU $x1, -48, $x1 -; CHECK: $x31 = OR8 $x1, $x1 - -; CHECK: $[[ORIGSP:x[0-9]+]] = ADDI8 $x31, 48 +; CHECK: $x0 = MFLR8 implicit $lr8 +; ELFV2-NEXT: $x12 = MFOCRF8 killed $cr2 +; V1ANDAIX-NEXT: $x12 = MFCR8 implicit killed $cr2 +; CHECK-DAG: STD $x31, -8, $x1 +; CHECK-DAG: STD killed $x0, 16, $x1 +; CHECK-DAG: STW8 killed $x12, 8, $x1 + +; ELFV2-NEXT: $x1 = STDU $x1, -48, $x1 +; V1ANDAIX-NEXT: x1 = STDU $x1, -128, $x1 + +; CHECK: $x31 = OR8 $x1, $x1 + +; ELFV2: $[[ORIGSP:x[0-9]+]] = ADDI8 $x31, 48 +; V1ANDAIX: $[[ORIGSP:x[0-9]+]] = ADDI8 $x31, 128 ; CHECK: $x1 = STDUX killed $[[ORIGSP]], $x1, killed $x{{[0-9]}} ; CHECK: INLINEASM {{.*}} early-clobber $cr2 -; CHECK: BL8_NOP @do_something +; CHECK: BL8_NOP ; Epilogue: diff --git a/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir b/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir --- a/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir +++ b/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir @@ -1,11 +1,19 @@ # RUN: llc -mtriple powerpc64le-unknown-linux-gnu -x mir -mcpu=pwr8 \ # RUN: -run-pass=prologepilog --verify-machineinstrs < %s | \ -# RUN: FileCheck %s --check-prefixes=CHECK,PWR8 +# RUN: FileCheck %s --check-prefixes=CHECK,PWR8,ELF # RUN: llc -mtriple powerpc64-unknown-linux-gnu -x mir -mcpu=pwr7 \ # RUN: -run-pass=prologepilog --verify-machineinstrs < %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK,PWR7,ELF + + +# RUN: llc -mtriple powerpc64-unknown-aix-xcoff -x mir -mcpu=pwr7 \ +# RUN: -run-pass=prologepilog --verify-machineinstrs < %s | \ # RUN: FileCheck %s --check-prefixes=CHECK,PWR7 +# TODO FIXME: We only check the save and restores of the callee saved gpr for +# ELF becuase AIX callee saved registers haven't been properly implemented yet. + --- name: CRAllSave alignment: 16 @@ -25,10 +33,10 @@ ; CHECK: liveins: $x3, $x29, $cr2, $cr4 ; CHECK: $x12 = MFCR8 implicit killed $cr2, implicit killed $cr4 - ; CHECK-DAG: STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.0) + ; ELF-DAG: STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.0) ; CHECK-DAG: STW8 killed $x12, 8, $x1 - ; CHECK: $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.0) + ; ELF: $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.0) ; CHECK: $x12 = LWZ8 8, $x1 ; CHECK: $cr2 = MTOCRF8 $x12 ; CHECK: $cr4 = MTOCRF8 killed $x12 @@ -56,10 +64,10 @@ ; PWR8: $x12 = MFOCRF8 killed $cr2 ; PWR7: $x12 = MFCR8 implicit killed $cr2 - ; CHECK-DAG: STD killed $x14, -144, $x1 :: (store 8 into %fixed-stack.0, align 16) + ; ELF-DAG: STD killed $x14, -144, $x1 :: (store 8 into %fixed-stack.0, align 16) ; CHECK-DAG: STW8 killed $x12, 8, $x1 - ; CHECK: $x14 = LD -144, $x1 :: (load 8 from %fixed-stack.0, align 16) + ; ELF: $x14 = LD -144, $x1 :: (load 8 from %fixed-stack.0, align 16) ; CHECK: $x12 = LWZ8 8, $x1 ; CHECK: $cr2 = MTOCRF8 killed $x12