diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -626,7 +626,7 @@ // Work out frame sizes. uint64_t FrameSize = determineFrameLayoutAndUpdate(MF); int64_t NegFrameSize = -FrameSize; - if (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)) + if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize))) llvm_unreachable("Unhandled stack size!"); if (MFI.isFrameAddressTaken()) @@ -661,10 +661,6 @@ : PPC::STWU ); const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX); - const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 - : PPC::LIS ); - const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 - : PPC::ORI ); const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR ); const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 @@ -935,11 +931,7 @@ .addImm(NegFrameSize); } else { assert(!SingleScratchReg && "Only a single scratch reg available"); - BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) - .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) - .addReg(TempReg, RegState::Kill) - .addImm(NegFrameSize & 0xFFFF); + TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize); BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) .addReg(ScratchReg, RegState::Kill) .addReg(TempReg, RegState::Kill); @@ -958,11 +950,7 @@ .addReg(SPReg); } else { - BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) - .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) - .addReg(ScratchReg, RegState::Kill) - .addImm(NegFrameSize & 0xFFFF); + TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize); BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) .addReg(SPReg, RegState::Kill) .addReg(SPReg) @@ -1669,7 +1657,7 @@ // values from the stack, and set SPAdd to the value that needs to be added // to the SP at the end. The default values are as if red zone was present. unsigned RBReg = SPReg; - unsigned SPAdd = 0; + uint64_t SPAdd = 0; // Check if we can move the stack update instruction up the epilogue // past the callee saves. This will allow the move to LR instruction @@ -1727,11 +1715,7 @@ BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) .addReg(FPReg).addImm(FrameSize); } else { - BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) - .addImm(FrameSize >> 16); - BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) - .addReg(ScratchReg, RegState::Kill) - .addImm(FrameSize & 0xFFFF); + TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize); BuildMI(MBB, MBBI, dl, AddInst) .addReg(RBReg) .addReg(FPReg) diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -189,7 +189,7 @@ } /// getSmallIPtrImm - Return a target constant of pointer type. - inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { + inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) { return CurDAG->getTargetConstant( Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); } @@ -203,7 +203,7 @@ /// base register. Return the virtual register that holds this value. SDNode *getGlobalBaseReg(); - void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); + void selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset = 0); // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. @@ -640,7 +640,7 @@ && isInt32Immediate(N->getOperand(1).getNode(), Imm); } -void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { +void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) { SDLoc dl(SN); int FI = cast(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); @@ -5379,7 +5379,7 @@ // If this is equivalent to an add, then we can fold it with the // FrameIndex calculation. if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { - selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm); return; } } @@ -5437,7 +5437,7 @@ int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { - selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm); return; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -746,6 +746,12 @@ MachineInstr *getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const; + // Materialize immediate after RA. + void materializeImmPostRA(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register Reg, + int64_t Imm) const; + /// getRegNumForOperand - some operands use different numbering schemes /// for the same registers. For example, a VSX instruction may have any of /// vs0-vs63 allocated whereas an Altivec instruction could only have diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3234,6 +3234,47 @@ return nullptr; } +void PPCInstrInfo::materializeImmPostRA(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register Reg, + int64_t Imm) const { + assert(!MBB.getParent()->getRegInfo().isSSA() && + "Register should be in non-SSA form after RA"); + bool isPPC64 = Subtarget.isPPC64(); + // FIXME: Materialization here is not optimal. + // For some special bit patterns we can use less instructions. + // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp. + if (isInt<16>(Imm)) { + BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm); + } else if (isInt<32>(Imm)) { + BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg) + .addImm(Imm >> 16); + if (Imm & 0xFFFF) + BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg) + .addReg(Reg, RegState::Kill) + .addImm(Imm & 0xFFFF); + } else { + assert(isPPC64 && "Materializing 64-bit immediate to single register is " + "only supported in PPC64"); + BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48); + if ((Imm >> 32) & 0xFFFF) + BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg) + .addReg(Reg, RegState::Kill) + .addImm((Imm >> 32) & 0xFFFF); + BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg) + .addReg(Reg, RegState::Kill) + .addImm(32) + .addImm(31); + BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg) + .addReg(Reg, RegState::Kill) + .addImm((Imm >> 16) & 0xFFFF); + if (Imm & 0xFFFF) + BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg) + .addReg(Reg, RegState::Kill) + .addImm(Imm & 0xFFFF); + } +} + MachineInstr *PPCInstrInfo::getForwardingDefMI( MachineInstr &MI, unsigned &OpNoForForwarding, diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1477,7 +1477,7 @@ OpC != TargetOpcode::PATCHPOINT && !ImmToIdxMap.count(OpC); // Now add the frame object offset to the offset from r1. - int Offset = MFI.getObjectOffset(FrameIndex); + int64_t Offset = MFI.getObjectOffset(FrameIndex); Offset += MI.getOperand(OffsetOperandNo).getImm(); // If we're not using a Frame Pointer that has been set to the value of the @@ -1537,13 +1537,16 @@ // Insert a set of rA with the full offset value before the ld, st, or add if (isInt<16>(Offset)) BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI), SReg) - .addImm(Offset); - else { + .addImm(Offset); + else if (isInt<32>(Offset)) { BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi) - .addImm(Offset >> 16); + .addImm(Offset >> 16); BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg) - .addReg(SRegHi, RegState::Kill) - .addImm(Offset); + .addReg(SRegHi, RegState::Kill) + .addImm(Offset); + } else { + assert(is64Bit && "Huge stack is only supported on PPC64"); + TII.materializeImmPostRA(MBB, II, dl, SReg, Offset); } // Convert into indexed form of the instruction: diff --git a/llvm/test/CodeGen/PowerPC/huge-frame-call.ll b/llvm/test/CodeGen/PowerPC/huge-frame-call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/huge-frame-call.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu < %s \ +; RUN: 2>&1 | FileCheck --check-prefix=CHECK-LE %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff < %s \ +; RUN: 2>&1 | FileCheck --check-prefix=CHECK-BE %s + +%0 = type <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [8 x i8] }> +@global.1 = internal global %0 <{ i32 129, i32 2, i32 118, i32 0, i32 5, i32 0, i32 0, i32 0, i32 120, i32 0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @global.2, i32 0, i32 0), [8 x i8] c"\00\00\00\00\00\00\00\03" }>, align 4 +@global.2 = internal constant [3 x i8] c"x.c" +@alias = dso_local alias i32 (), i32 ()* @main + +define dso_local signext i32 @main() nounwind { +; CHECK-LE-LABEL: main: +; CHECK-LE: # %bb.0: # %bb +; CHECK-LE-NEXT: mflr 0 +; CHECK-LE-NEXT: std 0, 16(1) +; CHECK-LE-NEXT: lis 0, -1 +; CHECK-LE-NEXT: ori 0, 0, 65535 +; CHECK-LE-NEXT: sldi 0, 0, 32 +; CHECK-LE-NEXT: oris 0, 0, 32767 +; CHECK-LE-NEXT: ori 0, 0, 65120 +; CHECK-LE-NEXT: stdux 1, 1, 0 +; CHECK-LE-NEXT: lis 3, 0 +; CHECK-LE-NEXT: sldi 3, 3, 32 +; CHECK-LE-NEXT: oris 3, 3, 32768 +; CHECK-LE-NEXT: ori 3, 3, 400 +; CHECK-LE-NEXT: stdx 30, 1, 3 # 8-byte Folded Spill +; CHECK-LE-NEXT: bl pluto +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: addis 3, 2, global.1@toc@ha +; CHECK-LE-NEXT: li 4, 0 +; CHECK-LE-NEXT: li 7, 0 +; CHECK-LE-NEXT: li 8, 0 +; CHECK-LE-NEXT: li 9, 0 +; CHECK-LE-NEXT: addi 5, 3, global.1@toc@l +; CHECK-LE-NEXT: ori 6, 4, 32768 +; CHECK-LE-NEXT: li 3, 6 +; CHECK-LE-NEXT: li 4, 257 +; CHECK-LE-NEXT: bl snork +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mr 30, 3 +; CHECK-LE-NEXT: li 3, 344 +; CHECK-LE-NEXT: addi 4, 1, 48 +; CHECK-LE-NEXT: li 5, 8 +; CHECK-LE-NEXT: li 6, 8 +; CHECK-LE-NEXT: oris 3, 3, 32768 +; CHECK-LE-NEXT: add 4, 4, 3 +; CHECK-LE-NEXT: mr 3, 30 +; CHECK-LE-NEXT: bl zot +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mr 3, 30 +; CHECK-LE-NEXT: bl wibble +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: li 3, 0 +; CHECK-LE-NEXT: bl snork.3 +; CHECK-LE-NEXT: nop +; +; CHECK-BE-LABEL: main: +; CHECK-BE: # %bb.0: # %bb +; CHECK-BE-NEXT: mflr 0 +; CHECK-BE-NEXT: std 0, 16(1) +; CHECK-BE-NEXT: lis 0, -1 +; CHECK-BE-NEXT: ori 0, 0, 65535 +; CHECK-BE-NEXT: sldi 0, 0, 32 +; CHECK-BE-NEXT: oris 0, 0, 32767 +; CHECK-BE-NEXT: ori 0, 0, 65056 +; CHECK-BE-NEXT: stdux 1, 1, 0 +; CHECK-BE-NEXT: lis 3, 0 +; CHECK-BE-NEXT: sldi 3, 3, 32 +; CHECK-BE-NEXT: oris 3, 3, 32768 +; CHECK-BE-NEXT: ori 3, 3, 472 +; CHECK-BE-NEXT: stdx 31, 1, 3 # 8-byte Folded Spill +; CHECK-BE-NEXT: bl .pluto[PR] +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: ld 5, L..C0(2) # @global.1 +; CHECK-BE-NEXT: li 3, 0 +; CHECK-BE-NEXT: ori 6, 3, 32768 +; CHECK-BE-NEXT: li 3, 6 +; CHECK-BE-NEXT: li 4, 257 +; CHECK-BE-NEXT: li 7, 0 +; CHECK-BE-NEXT: li 8, 0 +; CHECK-BE-NEXT: li 9, 0 +; CHECK-BE-NEXT: bl .snork[PR] +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr 31, 3 +; CHECK-BE-NEXT: li 3, 344 +; CHECK-BE-NEXT: oris 3, 3, 32768 +; CHECK-BE-NEXT: addi 4, 1, 120 +; CHECK-BE-NEXT: add 4, 4, 3 +; CHECK-BE-NEXT: mr 3, 31 +; CHECK-BE-NEXT: li 5, 8 +; CHECK-BE-NEXT: li 6, 8 +; CHECK-BE-NEXT: bl .zot[PR] +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr 3, 31 +; CHECK-BE-NEXT: bl .wibble[PR] +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: li 3, 0 +; CHECK-BE-NEXT: bl .snork.3[PR] +; CHECK-BE-NEXT: nop +bb: + %tmp = alloca [2147484000 x i8], align 8 + tail call void @pluto() + %tmp6 = tail call i64 @snork(i64 6, i32 257, %0* nonnull @global.1, i64 32768, i8* null, i64 0, i8* null) + %tmp7 = getelementptr inbounds [2147484000 x i8], [2147484000 x i8]* %tmp, i64 0, i64 2147483992 + %tmp8 = bitcast i8* %tmp7 to double* + %tmp9 = call i64 @zot(i64 %tmp6, double* nonnull %tmp8, i64 8, i64 8) + %tmp10 = call i64 @wibble(i64 %tmp6) + call void @snork.3(i64 0) + unreachable +} + +declare void @pluto() + +declare signext i64 @snork(i64, i32, %0*, i64, i8*, i64, i8*) + +declare signext i64 @zot(i64, double*, i64, i64) + +declare signext i64 @wibble(i64) + +declare void @snork.3(i64) diff --git a/llvm/test/CodeGen/PowerPC/huge-frame-size.ll b/llvm/test/CodeGen/PowerPC/huge-frame-size.ll --- a/llvm/test/CodeGen/PowerPC/huge-frame-size.ll +++ b/llvm/test/CodeGen/PowerPC/huge-frame-size.ll @@ -1,17 +1,57 @@ -; REQUIRES: asserts -; RUN: not --crash llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu < %s \ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu < %s \ ; RUN: 2>&1 | FileCheck --check-prefix=CHECK-LE %s -; RUN: not --crash llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff < %s \ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff < %s \ ; RUN: 2>&1 | FileCheck --check-prefix=CHECK-BE %s declare void @bar(i8*) define void @foo(i8 %x) { -; CHECK-LE: Unhandled stack size -; CHECK-BE: Unhandled stack size +; CHECK-LE-LABEL: foo: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lis 0, -1 +; CHECK-LE-NEXT: ori 0, 0, 65534 +; CHECK-LE-NEXT: sldi 0, 0, 32 +; CHECK-LE-NEXT: oris 0, 0, 65535 +; CHECK-LE-NEXT: ori 0, 0, 65504 +; CHECK-LE-NEXT: stdux 1, 1, 0 +; CHECK-LE-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LE-NEXT: li 4, 1 +; CHECK-LE-NEXT: li 5, -1 +; CHECK-LE-NEXT: addi 6, 1, 32 +; CHECK-LE-NEXT: stb 3, 32(1) +; CHECK-LE-NEXT: rldic 4, 4, 31, 32 +; CHECK-LE-NEXT: rldic 5, 5, 0, 32 +; CHECK-LE-NEXT: stbx 3, 6, 4 +; CHECK-LE-NEXT: stbx 3, 6, 5 +; CHECK-LE-NEXT: ld 1, 0(1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: foo: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lis 0, -1 +; CHECK-BE-NEXT: ori 0, 0, 65534 +; CHECK-BE-NEXT: sldi 0, 0, 32 +; CHECK-BE-NEXT: oris 0, 0, 65535 +; CHECK-BE-NEXT: ori 0, 0, 65488 +; CHECK-BE-NEXT: stdux 1, 1, 0 +; CHECK-BE-NEXT: li 4, 1 +; CHECK-BE-NEXT: addi 5, 1, 48 +; CHECK-BE-NEXT: rldic 4, 4, 31, 32 +; CHECK-BE-NEXT: stb 3, 48(1) +; CHECK-BE-NEXT: stbx 3, 5, 4 +; CHECK-BE-NEXT: li 4, -1 +; CHECK-BE-NEXT: rldic 4, 4, 0, 32 +; CHECK-BE-NEXT: stbx 3, 5, 4 +; CHECK-BE-NEXT: ld 1, 0(1) +; CHECK-BE-NEXT: blr entry: %a = alloca i8, i64 4294967296, align 16 %b = getelementptr i8, i8* %a, i64 0 + %c = getelementptr i8, i8* %a, i64 2147483648 + %d = getelementptr i8, i8* %a, i64 4294967295 store volatile i8 %x, i8* %b + store volatile i8 %x, i8* %c + store volatile i8 %x, i8* %d ret void } diff --git a/llvm/test/CodeGen/PowerPC/huge-frame-unsupported.ll b/llvm/test/CodeGen/PowerPC/huge-frame-unsupported.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/huge-frame-unsupported.ll @@ -0,0 +1,14 @@ +; REQUIRES: asserts +; RUN: not --crash llc -verify-machineinstrs -mtriple=powerpc-unknown-unknown < %s \ +; RUN: 2>&1 | FileCheck %s + +declare void @bar(i8*) + +define void @foo(i8 %x) { +; CHECK: Unhandled stack size +entry: + %a = alloca i8, i64 4294967296, align 16 + %b = getelementptr i8, i8* %a, i64 0 + store volatile i8 %x, i8* %b + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll @@ -2287,13 +2287,12 @@ ; BE-P10-LABEL: aligned: ; BE-P10: # %bb.0: # %entry ; BE-P10-NEXT: mflr r0 -; BE-P10-NEXT: lis r12, -1 ; BE-P10-NEXT: std r30, -16(r1) +; BE-P10-NEXT: lis r12, -1 ; BE-P10-NEXT: mr r30, r1 ; BE-P10-NEXT: std r0, 16(r1) ; BE-P10-NEXT: hashst r0, -24(r1) ; BE-P10-NEXT: clrldi r0, r1, 49 -; BE-P10-NEXT: ori r12, r12, 0 ; BE-P10-NEXT: subc r0, r12, r0 ; BE-P10-NEXT: stdux r1, r1, r0 ; BE-P10-NEXT: std r31, -8(r30) # 8-byte Folded Spill @@ -2331,10 +2330,9 @@ ; BE-P9-LABEL: aligned: ; BE-P9: # %bb.0: # %entry ; BE-P9-NEXT: mflr r0 -; BE-P9-NEXT: lis r12, -1 ; BE-P9-NEXT: std r30, -16(r1) +; BE-P9-NEXT: lis r12, -1 ; BE-P9-NEXT: mr r30, r1 -; BE-P9-NEXT: ori r12, r12, 0 ; BE-P9-NEXT: std r0, 16(r1) ; BE-P9-NEXT: hashst r0, -24(r1) ; BE-P9-NEXT: clrldi r0, r1, 49 @@ -2375,13 +2373,12 @@ ; BE-P8-LABEL: aligned: ; BE-P8: # %bb.0: # %entry ; BE-P8-NEXT: mflr r0 -; BE-P8-NEXT: lis r12, -1 ; BE-P8-NEXT: std r30, -16(r1) +; BE-P8-NEXT: lis r12, -1 ; BE-P8-NEXT: mr r30, r1 ; BE-P8-NEXT: std r0, 16(r1) ; BE-P8-NEXT: hashst r0, -24(r1) ; BE-P8-NEXT: clrldi r0, r1, 49 -; BE-P8-NEXT: ori r12, r12, 0 ; BE-P8-NEXT: subc r0, r12, r0 ; BE-P8-NEXT: stdux r1, r1, r0 ; BE-P8-NEXT: std r31, -8(r30) # 8-byte Folded Spill @@ -2419,13 +2416,12 @@ ; BE-32BIT-P10-LABEL: aligned: ; BE-32BIT-P10: # %bb.0: # %entry ; BE-32BIT-P10-NEXT: mflr r0 -; BE-32BIT-P10-NEXT: lis r12, -1 ; BE-32BIT-P10-NEXT: stw r30, -8(r1) +; BE-32BIT-P10-NEXT: lis r12, -1 ; BE-32BIT-P10-NEXT: mr r30, r1 ; BE-32BIT-P10-NEXT: stw r0, 8(r1) ; BE-32BIT-P10-NEXT: hashst r0, -16(r1) ; BE-32BIT-P10-NEXT: clrlwi r0, r1, 17 -; BE-32BIT-P10-NEXT: ori r12, r12, 0 ; BE-32BIT-P10-NEXT: subc r0, r12, r0 ; BE-32BIT-P10-NEXT: stwux r1, r1, r0 ; BE-32BIT-P10-NEXT: stw r31, -4(r30) # 4-byte Folded Spill @@ -2462,10 +2458,9 @@ ; BE-32BIT-P9-LABEL: aligned: ; BE-32BIT-P9: # %bb.0: # %entry ; BE-32BIT-P9-NEXT: mflr r0 -; BE-32BIT-P9-NEXT: lis r12, -1 ; BE-32BIT-P9-NEXT: stw r30, -8(r1) +; BE-32BIT-P9-NEXT: lis r12, -1 ; BE-32BIT-P9-NEXT: mr r30, r1 -; BE-32BIT-P9-NEXT: ori r12, r12, 0 ; BE-32BIT-P9-NEXT: stw r0, 8(r1) ; BE-32BIT-P9-NEXT: hashst r0, -16(r1) ; BE-32BIT-P9-NEXT: clrlwi r0, r1, 17 @@ -2505,13 +2500,12 @@ ; BE-32BIT-P8-LABEL: aligned: ; BE-32BIT-P8: # %bb.0: # %entry ; BE-32BIT-P8-NEXT: mflr r0 -; BE-32BIT-P8-NEXT: lis r12, -1 ; BE-32BIT-P8-NEXT: stw r30, -8(r1) +; BE-32BIT-P8-NEXT: lis r12, -1 ; BE-32BIT-P8-NEXT: mr r30, r1 ; BE-32BIT-P8-NEXT: stw r0, 8(r1) ; BE-32BIT-P8-NEXT: hashst r0, -16(r1) ; BE-32BIT-P8-NEXT: clrlwi r0, r1, 17 -; BE-32BIT-P8-NEXT: ori r12, r12, 0 ; BE-32BIT-P8-NEXT: subc r0, r12, r0 ; BE-32BIT-P8-NEXT: stwux r1, r1, r0 ; BE-32BIT-P8-NEXT: stw r31, -4(r30) # 4-byte Folded Spill @@ -2548,13 +2542,12 @@ ; BE-P10-PRIV-LABEL: aligned: ; BE-P10-PRIV: # %bb.0: # %entry ; BE-P10-PRIV-NEXT: mflr r0 -; BE-P10-PRIV-NEXT: lis r12, -1 ; BE-P10-PRIV-NEXT: std r30, -16(r1) +; BE-P10-PRIV-NEXT: lis r12, -1 ; BE-P10-PRIV-NEXT: mr r30, r1 ; BE-P10-PRIV-NEXT: std r0, 16(r1) ; BE-P10-PRIV-NEXT: hashstp r0, -24(r1) ; BE-P10-PRIV-NEXT: clrldi r0, r1, 49 -; BE-P10-PRIV-NEXT: ori r12, r12, 0 ; BE-P10-PRIV-NEXT: subc r0, r12, r0 ; BE-P10-PRIV-NEXT: stdux r1, r1, r0 ; BE-P10-PRIV-NEXT: std r31, -8(r30) # 8-byte Folded Spill @@ -2592,10 +2585,9 @@ ; BE-P9-PRIV-LABEL: aligned: ; BE-P9-PRIV: # %bb.0: # %entry ; BE-P9-PRIV-NEXT: mflr r0 -; BE-P9-PRIV-NEXT: lis r12, -1 ; BE-P9-PRIV-NEXT: std r30, -16(r1) +; BE-P9-PRIV-NEXT: lis r12, -1 ; BE-P9-PRIV-NEXT: mr r30, r1 -; BE-P9-PRIV-NEXT: ori r12, r12, 0 ; BE-P9-PRIV-NEXT: std r0, 16(r1) ; BE-P9-PRIV-NEXT: hashstp r0, -24(r1) ; BE-P9-PRIV-NEXT: clrldi r0, r1, 49 @@ -2636,13 +2628,12 @@ ; BE-P8-PRIV-LABEL: aligned: ; BE-P8-PRIV: # %bb.0: # %entry ; BE-P8-PRIV-NEXT: mflr r0 -; BE-P8-PRIV-NEXT: lis r12, -1 ; BE-P8-PRIV-NEXT: std r30, -16(r1) +; BE-P8-PRIV-NEXT: lis r12, -1 ; BE-P8-PRIV-NEXT: mr r30, r1 ; BE-P8-PRIV-NEXT: std r0, 16(r1) ; BE-P8-PRIV-NEXT: hashstp r0, -24(r1) ; BE-P8-PRIV-NEXT: clrldi r0, r1, 49 -; BE-P8-PRIV-NEXT: ori r12, r12, 0 ; BE-P8-PRIV-NEXT: subc r0, r12, r0 ; BE-P8-PRIV-NEXT: stdux r1, r1, r0 ; BE-P8-PRIV-NEXT: std r31, -8(r30) # 8-byte Folded Spill @@ -2680,13 +2671,12 @@ ; BE-32BIT-P10-PRIV-LABEL: aligned: ; BE-32BIT-P10-PRIV: # %bb.0: # %entry ; BE-32BIT-P10-PRIV-NEXT: mflr r0 -; BE-32BIT-P10-PRIV-NEXT: lis r12, -1 ; BE-32BIT-P10-PRIV-NEXT: stw r30, -8(r1) +; BE-32BIT-P10-PRIV-NEXT: lis r12, -1 ; BE-32BIT-P10-PRIV-NEXT: mr r30, r1 ; BE-32BIT-P10-PRIV-NEXT: stw r0, 8(r1) ; BE-32BIT-P10-PRIV-NEXT: hashstp r0, -16(r1) ; BE-32BIT-P10-PRIV-NEXT: clrlwi r0, r1, 17 -; BE-32BIT-P10-PRIV-NEXT: ori r12, r12, 0 ; BE-32BIT-P10-PRIV-NEXT: subc r0, r12, r0 ; BE-32BIT-P10-PRIV-NEXT: stwux r1, r1, r0 ; BE-32BIT-P10-PRIV-NEXT: stw r31, -4(r30) # 4-byte Folded Spill @@ -2723,10 +2713,9 @@ ; BE-32BIT-P9-PRIV-LABEL: aligned: ; BE-32BIT-P9-PRIV: # %bb.0: # %entry ; BE-32BIT-P9-PRIV-NEXT: mflr r0 -; BE-32BIT-P9-PRIV-NEXT: lis r12, -1 ; BE-32BIT-P9-PRIV-NEXT: stw r30, -8(r1) +; BE-32BIT-P9-PRIV-NEXT: lis r12, -1 ; BE-32BIT-P9-PRIV-NEXT: mr r30, r1 -; BE-32BIT-P9-PRIV-NEXT: ori r12, r12, 0 ; BE-32BIT-P9-PRIV-NEXT: stw r0, 8(r1) ; BE-32BIT-P9-PRIV-NEXT: hashstp r0, -16(r1) ; BE-32BIT-P9-PRIV-NEXT: clrlwi r0, r1, 17 @@ -2766,13 +2755,12 @@ ; BE-32BIT-P8-PRIV-LABEL: aligned: ; BE-32BIT-P8-PRIV: # %bb.0: # %entry ; BE-32BIT-P8-PRIV-NEXT: mflr r0 -; BE-32BIT-P8-PRIV-NEXT: lis r12, -1 ; BE-32BIT-P8-PRIV-NEXT: stw r30, -8(r1) +; BE-32BIT-P8-PRIV-NEXT: lis r12, -1 ; BE-32BIT-P8-PRIV-NEXT: mr r30, r1 ; BE-32BIT-P8-PRIV-NEXT: stw r0, 8(r1) ; BE-32BIT-P8-PRIV-NEXT: hashstp r0, -16(r1) ; BE-32BIT-P8-PRIV-NEXT: clrlwi r0, r1, 17 -; BE-32BIT-P8-PRIV-NEXT: ori r12, r12, 0 ; BE-32BIT-P8-PRIV-NEXT: subc r0, r12, r0 ; BE-32BIT-P8-PRIV-NEXT: stwux r1, r1, r0 ; BE-32BIT-P8-PRIV-NEXT: stw r31, -4(r30) # 4-byte Folded Spill diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll @@ -3410,13 +3410,12 @@ ; LE-P10-LABEL: aligned: ; LE-P10: # %bb.0: # %entry ; LE-P10-NEXT: mflr r0 -; LE-P10-NEXT: lis r12, -1 ; LE-P10-NEXT: std r30, -16(r1) +; LE-P10-NEXT: lis r12, -1 ; LE-P10-NEXT: mr r30, r1 ; LE-P10-NEXT: std r0, 16(r1) ; LE-P10-NEXT: hashst r0, -32(r1) ; LE-P10-NEXT: clrldi r0, r1, 49 -; LE-P10-NEXT: ori r12, r12, 0 ; LE-P10-NEXT: subc r0, r12, r0 ; LE-P10-NEXT: stdux r1, r1, r0 ; LE-P10-NEXT: std r29, -24(r30) # 8-byte Folded Spill @@ -3453,10 +3452,9 @@ ; LE-P9-LABEL: aligned: ; LE-P9: # %bb.0: # %entry ; LE-P9-NEXT: mflr r0 -; LE-P9-NEXT: lis r12, -1 ; LE-P9-NEXT: std r30, -16(r1) +; LE-P9-NEXT: lis r12, -1 ; LE-P9-NEXT: mr r30, r1 -; LE-P9-NEXT: ori r12, r12, 0 ; LE-P9-NEXT: std r0, 16(r1) ; LE-P9-NEXT: hashst r0, -32(r1) ; LE-P9-NEXT: clrldi r0, r1, 49 @@ -3497,13 +3495,12 @@ ; LE-P8-LABEL: aligned: ; LE-P8: # %bb.0: # %entry ; LE-P8-NEXT: mflr r0 -; LE-P8-NEXT: lis r12, -1 ; LE-P8-NEXT: std r30, -16(r1) +; LE-P8-NEXT: lis r12, -1 ; LE-P8-NEXT: mr r30, r1 ; LE-P8-NEXT: std r0, 16(r1) ; LE-P8-NEXT: hashst r0, -32(r1) ; LE-P8-NEXT: clrldi r0, r1, 49 -; LE-P8-NEXT: ori r12, r12, 0 ; LE-P8-NEXT: subc r0, r12, r0 ; LE-P8-NEXT: stdux r1, r1, r0 ; LE-P8-NEXT: std r29, -24(r30) # 8-byte Folded Spill @@ -3547,7 +3544,6 @@ ; LE-P10-O0-NEXT: mr r30, r1 ; LE-P10-O0-NEXT: clrldi r0, r1, 49 ; LE-P10-O0-NEXT: lis r12, -1 -; LE-P10-O0-NEXT: ori r12, r12, 0 ; LE-P10-O0-NEXT: subc r0, r12, r0 ; LE-P10-O0-NEXT: stdux r1, r1, r0 ; LE-P10-O0-NEXT: std r3, 32752(r1) # 8-byte Folded Spill @@ -3591,7 +3587,6 @@ ; LE-P9-O0-NEXT: mr r30, r1 ; LE-P9-O0-NEXT: clrldi r0, r1, 49 ; LE-P9-O0-NEXT: lis r12, -1 -; LE-P9-O0-NEXT: ori r12, r12, 0 ; LE-P9-O0-NEXT: subc r0, r12, r0 ; LE-P9-O0-NEXT: stdux r1, r1, r0 ; LE-P9-O0-NEXT: std r3, 32752(r1) # 8-byte Folded Spill @@ -3635,7 +3630,6 @@ ; LE-P8-O0-NEXT: mr r30, r1 ; LE-P8-O0-NEXT: clrldi r0, r1, 49 ; LE-P8-O0-NEXT: lis r12, -1 -; LE-P8-O0-NEXT: ori r12, r12, 0 ; LE-P8-O0-NEXT: subc r0, r12, r0 ; LE-P8-O0-NEXT: stdux r1, r1, r0 ; LE-P8-O0-NEXT: std r3, 32752(r1) # 8-byte Folded Spill @@ -3673,13 +3667,12 @@ ; BE-P10-LABEL: aligned: ; BE-P10: # %bb.0: # %entry ; BE-P10-NEXT: mflr r0 -; BE-P10-NEXT: lis r12, -1 ; BE-P10-NEXT: std r30, -16(r1) +; BE-P10-NEXT: lis r12, -1 ; BE-P10-NEXT: mr r30, r1 ; BE-P10-NEXT: std r0, 16(r1) ; BE-P10-NEXT: hashst r0, -32(r1) ; BE-P10-NEXT: clrldi r0, r1, 49 -; BE-P10-NEXT: ori r12, r12, 0 ; BE-P10-NEXT: subc r0, r12, r0 ; BE-P10-NEXT: stdux r1, r1, r0 ; BE-P10-NEXT: std r29, -24(r30) # 8-byte Folded Spill @@ -3717,10 +3710,9 @@ ; BE-P9-LABEL: aligned: ; BE-P9: # %bb.0: # %entry ; BE-P9-NEXT: mflr r0 -; BE-P9-NEXT: lis r12, -1 ; BE-P9-NEXT: std r30, -16(r1) +; BE-P9-NEXT: lis r12, -1 ; BE-P9-NEXT: mr r30, r1 -; BE-P9-NEXT: ori r12, r12, 0 ; BE-P9-NEXT: std r0, 16(r1) ; BE-P9-NEXT: hashst r0, -32(r1) ; BE-P9-NEXT: clrldi r0, r1, 49 @@ -3761,13 +3753,12 @@ ; BE-P8-LABEL: aligned: ; BE-P8: # %bb.0: # %entry ; BE-P8-NEXT: mflr r0 -; BE-P8-NEXT: lis r12, -1 ; BE-P8-NEXT: std r30, -16(r1) +; BE-P8-NEXT: lis r12, -1 ; BE-P8-NEXT: mr r30, r1 ; BE-P8-NEXT: std r0, 16(r1) ; BE-P8-NEXT: hashst r0, -32(r1) ; BE-P8-NEXT: clrldi r0, r1, 49 -; BE-P8-NEXT: ori r12, r12, 0 ; BE-P8-NEXT: subc r0, r12, r0 ; BE-P8-NEXT: stdux r1, r1, r0 ; BE-P8-NEXT: std r29, -24(r30) # 8-byte Folded Spill @@ -3809,7 +3800,6 @@ ; BE-32BIT-P10-NEXT: stw r0, 4(r1) ; BE-32BIT-P10-NEXT: hashst r0, -24(r1) ; BE-32BIT-P10-NEXT: clrlwi r0, r1, 17 -; BE-32BIT-P10-NEXT: ori r12, r12, 0 ; BE-32BIT-P10-NEXT: subc r0, r12, r0 ; BE-32BIT-P10-NEXT: stwux r1, r1, r0 ; BE-32BIT-P10-NEXT: sub r0, r1, r0 @@ -3853,7 +3843,6 @@ ; BE-32BIT-P9: # %bb.0: # %entry ; BE-32BIT-P9-NEXT: mflr r0 ; BE-32BIT-P9-NEXT: lis r12, -1 -; BE-32BIT-P9-NEXT: ori r12, r12, 0 ; BE-32BIT-P9-NEXT: stw r0, 4(r1) ; BE-32BIT-P9-NEXT: hashst r0, -24(r1) ; BE-32BIT-P9-NEXT: clrlwi r0, r1, 17 @@ -3903,7 +3892,6 @@ ; BE-32BIT-P8-NEXT: stw r0, 4(r1) ; BE-32BIT-P8-NEXT: hashst r0, -24(r1) ; BE-32BIT-P8-NEXT: clrlwi r0, r1, 17 -; BE-32BIT-P8-NEXT: ori r12, r12, 0 ; BE-32BIT-P8-NEXT: subc r0, r12, r0 ; BE-32BIT-P8-NEXT: stwux r1, r1, r0 ; BE-32BIT-P8-NEXT: sub r0, r1, r0 @@ -3946,13 +3934,12 @@ ; LE-P10-PRIV-LABEL: aligned: ; LE-P10-PRIV: # %bb.0: # %entry ; LE-P10-PRIV-NEXT: mflr r0 -; LE-P10-PRIV-NEXT: lis r12, -1 ; LE-P10-PRIV-NEXT: std r30, -16(r1) +; LE-P10-PRIV-NEXT: lis r12, -1 ; LE-P10-PRIV-NEXT: mr r30, r1 ; LE-P10-PRIV-NEXT: std r0, 16(r1) ; LE-P10-PRIV-NEXT: hashstp r0, -32(r1) ; LE-P10-PRIV-NEXT: clrldi r0, r1, 49 -; LE-P10-PRIV-NEXT: ori r12, r12, 0 ; LE-P10-PRIV-NEXT: subc r0, r12, r0 ; LE-P10-PRIV-NEXT: stdux r1, r1, r0 ; LE-P10-PRIV-NEXT: std r29, -24(r30) # 8-byte Folded Spill @@ -3989,10 +3976,9 @@ ; LE-P9-PRIV-LABEL: aligned: ; LE-P9-PRIV: # %bb.0: # %entry ; LE-P9-PRIV-NEXT: mflr r0 -; LE-P9-PRIV-NEXT: lis r12, -1 ; LE-P9-PRIV-NEXT: std r30, -16(r1) +; LE-P9-PRIV-NEXT: lis r12, -1 ; LE-P9-PRIV-NEXT: mr r30, r1 -; LE-P9-PRIV-NEXT: ori r12, r12, 0 ; LE-P9-PRIV-NEXT: std r0, 16(r1) ; LE-P9-PRIV-NEXT: hashstp r0, -32(r1) ; LE-P9-PRIV-NEXT: clrldi r0, r1, 49 @@ -4033,13 +4019,12 @@ ; LE-P8-PRIV-LABEL: aligned: ; LE-P8-PRIV: # %bb.0: # %entry ; LE-P8-PRIV-NEXT: mflr r0 -; LE-P8-PRIV-NEXT: lis r12, -1 ; LE-P8-PRIV-NEXT: std r30, -16(r1) +; LE-P8-PRIV-NEXT: lis r12, -1 ; LE-P8-PRIV-NEXT: mr r30, r1 ; LE-P8-PRIV-NEXT: std r0, 16(r1) ; LE-P8-PRIV-NEXT: hashstp r0, -32(r1) ; LE-P8-PRIV-NEXT: clrldi r0, r1, 49 -; LE-P8-PRIV-NEXT: ori r12, r12, 0 ; LE-P8-PRIV-NEXT: subc r0, r12, r0 ; LE-P8-PRIV-NEXT: stdux r1, r1, r0 ; LE-P8-PRIV-NEXT: std r29, -24(r30) # 8-byte Folded Spill @@ -4077,13 +4062,12 @@ ; BE-P10-PRIV-LABEL: aligned: ; BE-P10-PRIV: # %bb.0: # %entry ; BE-P10-PRIV-NEXT: mflr r0 -; BE-P10-PRIV-NEXT: lis r12, -1 ; BE-P10-PRIV-NEXT: std r30, -16(r1) +; BE-P10-PRIV-NEXT: lis r12, -1 ; BE-P10-PRIV-NEXT: mr r30, r1 ; BE-P10-PRIV-NEXT: std r0, 16(r1) ; BE-P10-PRIV-NEXT: hashstp r0, -32(r1) ; BE-P10-PRIV-NEXT: clrldi r0, r1, 49 -; BE-P10-PRIV-NEXT: ori r12, r12, 0 ; BE-P10-PRIV-NEXT: subc r0, r12, r0 ; BE-P10-PRIV-NEXT: stdux r1, r1, r0 ; BE-P10-PRIV-NEXT: std r29, -24(r30) # 8-byte Folded Spill @@ -4121,10 +4105,9 @@ ; BE-P9-PRIV-LABEL: aligned: ; BE-P9-PRIV: # %bb.0: # %entry ; BE-P9-PRIV-NEXT: mflr r0 -; BE-P9-PRIV-NEXT: lis r12, -1 ; BE-P9-PRIV-NEXT: std r30, -16(r1) +; BE-P9-PRIV-NEXT: lis r12, -1 ; BE-P9-PRIV-NEXT: mr r30, r1 -; BE-P9-PRIV-NEXT: ori r12, r12, 0 ; BE-P9-PRIV-NEXT: std r0, 16(r1) ; BE-P9-PRIV-NEXT: hashstp r0, -32(r1) ; BE-P9-PRIV-NEXT: clrldi r0, r1, 49 @@ -4165,13 +4148,12 @@ ; BE-P8-PRIV-LABEL: aligned: ; BE-P8-PRIV: # %bb.0: # %entry ; BE-P8-PRIV-NEXT: mflr r0 -; BE-P8-PRIV-NEXT: lis r12, -1 ; BE-P8-PRIV-NEXT: std r30, -16(r1) +; BE-P8-PRIV-NEXT: lis r12, -1 ; BE-P8-PRIV-NEXT: mr r30, r1 ; BE-P8-PRIV-NEXT: std r0, 16(r1) ; BE-P8-PRIV-NEXT: hashstp r0, -32(r1) ; BE-P8-PRIV-NEXT: clrldi r0, r1, 49 -; BE-P8-PRIV-NEXT: ori r12, r12, 0 ; BE-P8-PRIV-NEXT: subc r0, r12, r0 ; BE-P8-PRIV-NEXT: stdux r1, r1, r0 ; BE-P8-PRIV-NEXT: std r29, -24(r30) # 8-byte Folded Spill