diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -626,8 +626,6 @@ // Work out frame sizes. uint64_t FrameSize = determineFrameLayoutAndUpdate(MF); int64_t NegFrameSize = -FrameSize; - if (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)) - llvm_unreachable("Unhandled stack size!"); if (MFI.isFrameAddressTaken()) replaceFPWithRealFP(MF); @@ -667,6 +665,8 @@ : PPC::ORI ); const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR ); + const MCInstrDesc& OrImmShiftedInst = TII.get(isPPC64 ? PPC::ORIS8 + : PPC::ORIS); const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC); const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 @@ -934,11 +934,30 @@ .addImm(NegFrameSize); } else { assert(!SingleScratchReg && "Only a single scratch reg available"); - BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) + if (isInt<32>(NegFrameSize)) { + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) + BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) .addReg(TempReg, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); + } else { + assert(isPPC64 && "Huge frame is only supported on PPC64"); + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) + .addImm(NegFrameSize >> 48); + BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) + .addReg(TempReg, RegState::Kill) + .addImm((NegFrameSize >> 32) & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICR), TempReg) + .addReg(TempReg, RegState::Kill) + .addImm(32) + .addImm(31); + BuildMI(MBB, MBBI, dl, OrImmShiftedInst, TempReg) + .addReg(TempReg, RegState::Kill) + .addImm((NegFrameSize >> 16) & 0xFFFF); + BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) + .addReg(TempReg, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + } BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) .addReg(ScratchReg, RegState::Kill) .addReg(TempReg, RegState::Kill); @@ -957,11 +976,30 @@ .addReg(SPReg); } else { - BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) + if (isInt<32>(NegFrameSize)) { + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) + BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) .addReg(ScratchReg, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); + } else { + assert(isPPC64 && "Huge frame is only supported on PPC64"); + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) + .addImm(NegFrameSize >> 48); + BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm((NegFrameSize >> 32) & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICR), ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm(32) + .addImm(31); + BuildMI(MBB, MBBI, dl, OrImmShiftedInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm((NegFrameSize >> 16) & 0xFFFF); + BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + } BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) .addReg(SPReg, RegState::Kill) .addReg(SPReg) @@ -1582,6 +1620,8 @@ : PPC::OR ); const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 : PPC::ORI ); + const MCInstrDesc &OrImmShiftedInst = + TII.get(isPPC64 ? PPC::ORIS8 : PPC::ORIS); const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 : PPC::ADDI ); const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 @@ -1667,7 +1707,7 @@ // values from the stack, and set SPAdd to the value that needs to be added // to the SP at the end. The default values are as if red zone was present. unsigned RBReg = SPReg; - unsigned SPAdd = 0; + uint64_t SPAdd = 0; // Check if we can move the stack update instruction up the epilogue // past the callee saves. This will allow the move to LR instruction @@ -1725,11 +1765,29 @@ BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) .addReg(FPReg).addImm(FrameSize); } else { - BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) - .addImm(FrameSize >> 16); - BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) - .addReg(ScratchReg, RegState::Kill) - .addImm(FrameSize & 0xFFFF); + if (isInt<32>(FrameSize)) { + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) + .addImm(FrameSize >> 16); + BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm(FrameSize & 0xFFFF); + } else { + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) + .addImm(FrameSize >> 48); + BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm((FrameSize >> 32) & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICR), ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm(32) + .addImm(31); + BuildMI(MBB, MBBI, dl, OrImmShiftedInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm((FrameSize >> 16) & 0xFFFF); + BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm(FrameSize & 0xFFFF); + } BuildMI(MBB, MBBI, dl, AddInst) .addReg(RBReg) .addReg(FPReg) diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -188,7 +188,7 @@ } /// getSmallIPtrImm - Return a target constant of pointer type. - inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { + inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) { return CurDAG->getTargetConstant( Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); } @@ -202,7 +202,7 @@ /// base register. Return the virtual register that holds this value. SDNode *getGlobalBaseReg(); - void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); + void selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset = 0); // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. @@ -626,7 +626,7 @@ && isInt32Immediate(N->getOperand(1).getNode(), Imm); } -void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { +void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) { SDLoc dl(SN); int FI = cast(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); @@ -5241,7 +5241,7 @@ // If this is equivalent to an add, then we can fold it with the // FrameIndex calculation. if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { - selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm); return; } } @@ -5299,7 +5299,7 @@ int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { - selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm); return; } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1422,7 +1422,7 @@ OpC != TargetOpcode::PATCHPOINT && !ImmToIdxMap.count(OpC); // Now add the frame object offset to the offset from r1. - int Offset = MFI.getObjectOffset(FrameIndex); + int64_t Offset = MFI.getObjectOffset(FrameIndex); Offset += MI.getOperand(OffsetOperandNo).getImm(); // If we're not using a Frame Pointer that has been set to the value of the @@ -1484,11 +1484,30 @@ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI), SReg) .addImm(Offset); else { - BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi) - .addImm(Offset >> 16); - BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg) - .addReg(SRegHi, RegState::Kill) - .addImm(Offset); + if (isInt<32>(Offset)) { + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi) + .addImm(Offset >> 16); + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg) + .addReg(SRegHi, RegState::Kill) + .addImm(Offset); + } else { + assert(is64Bit && "Huge stack is only supported on PPC64"); + BuildMI(MBB, II, dl, TII.get(PPC::LIS8) , SReg) + .addImm(Offset >> 48); + BuildMI(MBB, II, dl, TII.get(PPC::ORI8), SReg) + .addReg(SReg, RegState::Kill) + .addImm((Offset >> 32) & 0xFFFF); + BuildMI(MBB, II, dl, TII.get(PPC::RLDICR), SReg) + .addReg(SReg, RegState::Kill) + .addImm(32) + .addImm(31); + BuildMI(MBB, II, dl, TII.get(PPC::ORIS8), SReg) + .addReg(SReg, RegState::Kill) + .addImm((Offset >> 16) & 0xFFFF); + BuildMI(MBB, II, dl, TII.get(PPC::ORI8), SReg) + .addReg(SReg, RegState::Kill) + .addImm(Offset & 0xFFFF); + } } // Convert into indexed form of the instruction: diff --git a/llvm/test/CodeGen/PowerPC/huge-frame-call.ll b/llvm/test/CodeGen/PowerPC/huge-frame-call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/huge-frame-call.ll @@ -0,0 +1,127 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; REQUIRES: asserts +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu < %s \ +; RUN: 2>&1 | FileCheck --check-prefix=CHECK-LE %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff < %s \ +; RUN: 2>&1 | FileCheck --check-prefix=CHECK-BE %s + +%0 = type <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [8 x i8] }> +@global.1 = internal global %0 <{ i32 129, i32 2, i32 118, i32 0, i32 5, i32 0, i32 0, i32 0, i32 120, i32 0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @global.2, i32 0, i32 0), [8 x i8] c"\00\00\00\00\00\00\00\03" }>, align 4 +@global.2 = internal constant [3 x i8] c"x.c" +@alias = dso_local alias i32 (), i32 ()* @main + +define dso_local signext i32 @main() { +; CHECK-LE-LABEL: main: +; CHECK-LE: # %bb.0: # %bb +; CHECK-LE-NEXT: mflr 0 +; CHECK-LE-NEXT: std 0, 16(1) +; CHECK-LE-NEXT: lis 0, -1 +; CHECK-LE-NEXT: ori 0, 0, 65535 +; CHECK-LE-NEXT: sldi 0, 0, 32 +; CHECK-LE-NEXT: oris 0, 0, 32767 +; CHECK-LE-NEXT: ori 0, 0, 65120 +; CHECK-LE-NEXT: stdux 1, 1, 0 +; CHECK-LE-NEXT: .cfi_def_cfa_offset -2147483232 +; CHECK-LE-NEXT: .cfi_offset lr, 16 +; CHECK-LE-NEXT: .cfi_offset r30, -16 +; CHECK-LE-NEXT: lis 3, 0 +; CHECK-LE-NEXT: ori 3, 3, 0 +; CHECK-LE-NEXT: sldi 3, 3, 32 +; CHECK-LE-NEXT: oris 3, 3, 32768 +; CHECK-LE-NEXT: ori 3, 3, 400 +; CHECK-LE-NEXT: stdx 30, 1, 3 # 8-byte Folded Spill +; CHECK-LE-NEXT: bl pluto +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: addis 3, 2, global.1@toc@ha +; CHECK-LE-NEXT: li 4, 0 +; CHECK-LE-NEXT: li 7, 0 +; CHECK-LE-NEXT: li 8, 0 +; CHECK-LE-NEXT: li 9, 0 +; CHECK-LE-NEXT: addi 5, 3, global.1@toc@l +; CHECK-LE-NEXT: ori 6, 4, 32768 +; CHECK-LE-NEXT: li 3, 6 +; CHECK-LE-NEXT: li 4, 257 +; CHECK-LE-NEXT: bl snork +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mr 30, 3 +; CHECK-LE-NEXT: li 3, 344 +; CHECK-LE-NEXT: addi 4, 1, 48 +; CHECK-LE-NEXT: li 5, 8 +; CHECK-LE-NEXT: li 6, 8 +; CHECK-LE-NEXT: oris 3, 3, 32768 +; CHECK-LE-NEXT: add 4, 4, 3 +; CHECK-LE-NEXT: mr 3, 30 +; CHECK-LE-NEXT: bl zot +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mr 3, 30 +; CHECK-LE-NEXT: bl wibble +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: li 3, 0 +; CHECK-LE-NEXT: bl snork.3 +; CHECK-LE-NEXT: nop +; +; CHECK-BE-LABEL: main: +; CHECK-BE: # %bb.0: # %bb +; CHECK-BE-NEXT: mflr 0 +; CHECK-BE-NEXT: std 0, 16(1) +; CHECK-BE-NEXT: lis 0, -1 +; CHECK-BE-NEXT: ori 0, 0, 65535 +; CHECK-BE-NEXT: sldi 0, 0, 32 +; CHECK-BE-NEXT: oris 0, 0, 32767 +; CHECK-BE-NEXT: ori 0, 0, 65056 +; CHECK-BE-NEXT: stdux 1, 1, 0 +; CHECK-BE-NEXT: lis 3, 0 +; CHECK-BE-NEXT: ori 3, 3, 0 +; CHECK-BE-NEXT: sldi 3, 3, 32 +; CHECK-BE-NEXT: oris 3, 3, 32768 +; CHECK-BE-NEXT: ori 3, 3, 472 +; CHECK-BE-NEXT: stdx 31, 1, 3 # 8-byte Folded Spill +; CHECK-BE-NEXT: bl .pluto[PR] +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: ld 5, L..C0(2) # @global.1 +; CHECK-BE-NEXT: li 3, 0 +; CHECK-BE-NEXT: ori 6, 3, 32768 +; CHECK-BE-NEXT: li 3, 6 +; CHECK-BE-NEXT: li 4, 257 +; CHECK-BE-NEXT: li 7, 0 +; CHECK-BE-NEXT: li 8, 0 +; CHECK-BE-NEXT: li 9, 0 +; CHECK-BE-NEXT: bl .snork[PR] +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr 31, 3 +; CHECK-BE-NEXT: li 3, 344 +; CHECK-BE-NEXT: oris 3, 3, 32768 +; CHECK-BE-NEXT: addi 4, 1, 120 +; CHECK-BE-NEXT: add 4, 4, 3 +; CHECK-BE-NEXT: mr 3, 31 +; CHECK-BE-NEXT: li 5, 8 +; CHECK-BE-NEXT: li 6, 8 +; CHECK-BE-NEXT: bl .zot[PR] +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr 3, 31 +; CHECK-BE-NEXT: bl .wibble[PR] +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: li 3, 0 +; CHECK-BE-NEXT: bl .snork.3[PR] +; CHECK-BE-NEXT: nop +bb: + %tmp = alloca [2147484000 x i8], align 8 + tail call void @pluto() + %tmp6 = tail call i64 @snork(i64 6, i32 257, %0* nonnull @global.1, i64 32768, i8* null, i64 0, i8* null) + %tmp7 = getelementptr inbounds [2147484000 x i8], [2147484000 x i8]* %tmp, i64 0, i64 2147483992 + %tmp8 = bitcast i8* %tmp7 to double* + %tmp9 = call i64 @zot(i64 %tmp6, double* nonnull %tmp8, i64 8, i64 8) + %tmp10 = call i64 @wibble(i64 %tmp6) + call void @snork.3(i64 0) + unreachable +} + +declare void @pluto() + +declare signext i64 @snork(i64, i32, %0*, i64, i8*, i64, i8*) + +declare signext i64 @zot(i64, double*, i64, i64) + +declare signext i64 @wibble(i64) + +declare void @snork.3(i64) diff --git a/llvm/test/CodeGen/PowerPC/huge-frame-size.ll b/llvm/test/CodeGen/PowerPC/huge-frame-size.ll --- a/llvm/test/CodeGen/PowerPC/huge-frame-size.ll +++ b/llvm/test/CodeGen/PowerPC/huge-frame-size.ll @@ -1,14 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; REQUIRES: asserts -; RUN: not --crash llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu < %s \ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu < %s \ ; RUN: 2>&1 | FileCheck --check-prefix=CHECK-LE %s -; RUN: not --crash llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff < %s \ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff < %s \ ; RUN: 2>&1 | FileCheck --check-prefix=CHECK-BE %s declare void @bar(i8*) define void @foo(i8 %x) { -; CHECK-LE: Unhandled stack size -; CHECK-BE: Unhandled stack size +; CHECK-LE-LABEL: foo: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lis 0, -1 +; CHECK-LE-NEXT: ori 0, 0, 65534 +; CHECK-LE-NEXT: sldi 0, 0, 32 +; CHECK-LE-NEXT: oris 0, 0, 65535 +; CHECK-LE-NEXT: ori 0, 0, 65504 +; CHECK-LE-NEXT: stdux 1, 1, 0 +; CHECK-LE-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LE-NEXT: stb 3, 32(1) +; CHECK-LE-NEXT: ld 1, 0(1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: foo: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lis 0, -1 +; CHECK-BE-NEXT: ori 0, 0, 65534 +; CHECK-BE-NEXT: sldi 0, 0, 32 +; CHECK-BE-NEXT: oris 0, 0, 65535 +; CHECK-BE-NEXT: ori 0, 0, 65488 +; CHECK-BE-NEXT: stdux 1, 1, 0 +; CHECK-BE-NEXT: stb 3, 48(1) +; CHECK-BE-NEXT: ld 1, 0(1) +; CHECK-BE-NEXT: blr entry: %a = alloca i8, i64 4294967296, align 16 %b = getelementptr i8, i8* %a, i64 0