diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -616,6 +616,10 @@ // AIX assembler does not support cfi directives. const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); + // On Power10, mflr has 2 cycles, but on other targets, mflr is a little + // expensive. We need to consider the latency when backup lr in the prologue. + const bool IsCheapMFLR = Subtarget.isISA3_1(); + // Get processor type. bool isPPC64 = Subtarget.isPPC64(); // Get the ABI. @@ -837,10 +841,11 @@ // Generate the instruction to store the LR. In the case where ROP protection // is required the register holding the LR should not be killed as it will be // used by the hash store instruction. - if (MustSaveLR) { + auto SaveLR = [&](int64_t Offset) { + assert(MustSaveLR && "LR is not required to be saved!"); BuildMI(MBB, StackUpdateLoc, dl, StoreInst) .addReg(ScratchReg, getKillRegState(!HasROPProtect)) - .addImm(LROffset) + .addImm(Offset) .addReg(SPReg); // Add the ROP protection Hash Store instruction. @@ -861,7 +866,10 @@ .addImm(ImmOffset) .addReg(SPReg); } - } + }; + + if (MustSaveLR && IsCheapMFLR) + SaveLR(LROffset); if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { @@ -873,8 +881,11 @@ } // Skip the rest if this is a leaf function & all spills fit in the Red Zone. - if (!FrameSize) + if (!FrameSize) { + if (MustSaveLR && !IsCheapMFLR) + SaveLR(LROffset); return; + } // Adjust stack pointer: r1 += NegFrameSize. // If there is a preferred stack alignment, align R1 now @@ -888,7 +899,15 @@ // Have we generated a STUX instruction to claim stack frame? If so, // the negated frame size will be placed in ScratchReg. - bool HasSTUX = false; + bool HasSTUX = + (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) || + (HasBP && MaxAlign > 1) || isLargeFrame; + + // If we use STUX to update the stack pointer, we need the two scratch + // registers TempReg and ScratchReg, we have to save LR here which is stored + // in ScratchReg. + if (HasSTUX && MustSaveLR && !IsCheapMFLR) + SaveLR(LROffset); // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain // pointer is always stored at SP, we will get a free probe due to an essential @@ -909,7 +928,6 @@ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) .addReg(ScratchReg) .addReg(SPReg); - HasSTUX = true; } } else { // This condition must be kept in sync with canUseAsPrologue. @@ -941,21 +959,17 @@ .addReg(SPReg, RegState::Kill) .addReg(SPReg) .addReg(ScratchReg); - HasSTUX = true; - } else if (!isLargeFrame) { BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) .addReg(SPReg) .addImm(NegFrameSize) .addReg(SPReg); - } else { TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize); BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) .addReg(SPReg, RegState::Kill) .addReg(SPReg) .addReg(ScratchReg); - HasSTUX = true; } } @@ -1082,6 +1096,10 @@ } } + // Save the LR now. + if (!HasSTUX && MustSaveLR && !IsCheapMFLR) + SaveLR(LROffset + FrameSize); + // Add Call Frame Information for the instructions we generated above. if (needsCFI) { unsigned CFIIndex; diff --git a/llvm/test/CodeGen/PowerPC/pr33547.ll b/llvm/test/CodeGen/PowerPC/pr33547.ll --- a/llvm/test/CodeGen/PowerPC/pr33547.ll +++ b/llvm/test/CodeGen/PowerPC/pr33547.ll @@ -11,8 +11,8 @@ ; CHECK-LABEL: main: ; CHECK: # %bb.0: # %L.entry ; CHECK-NEXT: mflr 0 -; CHECK-NEXT: std 0, 16(1) ; CHECK-NEXT: stdu 1, -32(1) +; CHECK-NEXT: std 0, 48(1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: addis 3, 2, .LC0@toc@ha @@ -47,8 +47,8 @@ ; CHECK-LABEL: testFunc: ; CHECK: # %bb.0: # %L.entry ; CHECK-NEXT: mflr 0 -; CHECK-NEXT: std 0, 16(1) ; CHECK-NEXT: stdu 1, -32(1) +; CHECK-NEXT: std 0, 48(1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl .L2$pb diff --git a/llvm/test/CodeGen/PowerPC/pr36292.ll b/llvm/test/CodeGen/PowerPC/pr36292.ll --- a/llvm/test/CodeGen/PowerPC/pr36292.ll +++ b/llvm/test/CodeGen/PowerPC/pr36292.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: mflr 0 ; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill -; CHECK-NEXT: std 0, 16(1) ; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: std 0, 80(1) ; CHECK-NEXT: ld 29, 0(3) ; CHECK-NEXT: ld 30, 32(1) ; CHECK-NEXT: cmpld 30, 29 diff --git a/llvm/test/CodeGen/PowerPC/pr41088.ll b/llvm/test/CodeGen/PowerPC/pr41088.ll --- a/llvm/test/CodeGen/PowerPC/pr41088.ll +++ b/llvm/test/CodeGen/PowerPC/pr41088.ll @@ -33,17 +33,10 @@ define void @test(ptr %arg, ptr %arg1, ptr %arg2) unnamed_addr personality ptr @personality { ; CHECK-LABEL: test: -; CHECK: .cfi_personality 148, DW.ref.personality -; CHECK-NEXT: .cfi_lsda 20, .Lexception0 -; CHECK-NEXT: .Lfunc_gep0: -; CHECK-NEXT: addis r2, r12, .TOC.-.Lfunc_gep0@ha -; CHECK-NEXT: addi r2, r2, .TOC.-.Lfunc_gep0@l -; CHECK-NEXT: .Lfunc_lep0: -; CHECK-NEXT: .localentry test, .Lfunc_lep0-.Lfunc_gep0 -; CHECK-NEXT: # %bb.0: # %bb +; CHECK: # %bb.0: # %bb ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: li r4, 0 diff --git a/llvm/test/CodeGen/PowerPC/pr43527.ll b/llvm/test/CodeGen/PowerPC/pr43527.ll --- a/llvm/test/CodeGen/PowerPC/pr43527.ll +++ b/llvm/test/CodeGen/PowerPC/pr43527.ll @@ -16,10 +16,10 @@ ; CHECK-NEXT: .cfi_offset r30, -16 ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -64(r1) ; CHECK-NEXT: sub r30, r4, r3 ; CHECK-NEXT: li r29, -4 +; CHECK-NEXT: std r0, 80(r1) ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_3: # %bb5 ; CHECK-NEXT: # diff --git a/llvm/test/CodeGen/PowerPC/pr43976.ll b/llvm/test/CodeGen/PowerPC/pr43976.ll --- a/llvm/test/CodeGen/PowerPC/pr43976.ll +++ b/llvm/test/CodeGen/PowerPC/pr43976.ll @@ -7,8 +7,8 @@ ; CHECK-LABEL: b: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -144(r1) +; CHECK-NEXT: std r0, 160(r1) ; CHECK-NEXT: addis r3, r2, a@toc@ha ; CHECK-NEXT: li r4, 1 ; CHECK-NEXT: lfd f0, a@toc@l(r3) diff --git a/llvm/test/CodeGen/PowerPC/pr44183.ll b/llvm/test/CodeGen/PowerPC/pr44183.ll --- a/llvm/test/CodeGen/PowerPC/pr44183.ll +++ b/llvm/test/CodeGen/PowerPC/pr44183.ll @@ -11,14 +11,14 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) ; CHECK-NEXT: mr r30, r3 ; CHECK-NEXT: ld r3, 8(r3) ; CHECK-NEXT: lwz r4, 36(r30) ; CHECK-NEXT: rldicl r3, r3, 60, 4 -; CHECK-NEXT: slwi r3, r3, 31 ; CHECK-NEXT: clrlwi r4, r4, 31 +; CHECK-NEXT: slwi r3, r3, 31 ; CHECK-NEXT: rlwimi r4, r3, 0, 0, 0 ; CHECK-NEXT: bl _ZN1llsE1d ; CHECK-NEXT: nop diff --git a/llvm/test/CodeGen/PowerPC/pr45301.ll b/llvm/test/CodeGen/PowerPC/pr45301.ll --- a/llvm/test/CodeGen/PowerPC/pr45301.ll +++ b/llvm/test/CodeGen/PowerPC/pr45301.ll @@ -7,8 +7,8 @@ ; CHECK-LABEL: g: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -112(r1) +; CHECK-NEXT: std r0, 128(r1) ; CHECK-NEXT: bl i ; CHECK-NEXT: nop ; CHECK-NEXT: addis r4, r2, g@toc@ha diff --git a/llvm/test/CodeGen/PowerPC/pr45432.ll b/llvm/test/CodeGen/PowerPC/pr45432.ll --- a/llvm/test/CodeGen/PowerPC/pr45432.ll +++ b/llvm/test/CodeGen/PowerPC/pr45432.ll @@ -11,9 +11,9 @@ ; CHECK-LABEL: h: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: mflr 0 -; CHECK-NEXT: std 0, 16(1) ; CHECK-NEXT: stdu 1, -64(1) ; CHECK-NEXT: addis 3, 2, g@toc@ha +; CHECK-NEXT: std 0, 80(1) ; CHECK-NEXT: std 30, 48(1) # 8-byte Folded Spill ; CHECK-NEXT: lwz 3, g@toc@l(3) ; CHECK-NEXT: extswsli 30, 3, 2 diff --git a/llvm/test/CodeGen/PowerPC/pr47373.ll b/llvm/test/CodeGen/PowerPC/pr47373.ll --- a/llvm/test/CodeGen/PowerPC/pr47373.ll +++ b/llvm/test/CodeGen/PowerPC/pr47373.ll @@ -8,11 +8,11 @@ ; CHECK-LABEL: d: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -208(r1) ; CHECK-NEXT: addis r3, r2, .LC0@toc@ha -; CHECK-NEXT: std r29, 184(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 224(r1) ; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: std r29, 184(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 192(r1) # 8-byte Folded Spill ; CHECK-NEXT: ld r29, 0(r3) ; CHECK-NEXT: bl c diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll --- a/llvm/test/CodeGen/PowerPC/pr48519.ll +++ b/llvm/test/CodeGen/PowerPC/pr48519.ll @@ -10,10 +10,10 @@ ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -48(r1) ; CHECK-NEXT: li r3, 1 ; CHECK-NEXT: li r30, 0 +; CHECK-NEXT: std r0, 64(r1) ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %bb3 ; CHECK-NEXT: # @@ -83,9 +83,9 @@ ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -48(r1) ; CHECK-NEXT: li r30, 3 +; CHECK-NEXT: std r0, 64(r1) ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB1_1: # %bb1 ; CHECK-NEXT: # @@ -148,12 +148,12 @@ ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: mfocrf r12, 32 ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stw r12, 8(r1) ; CHECK-NEXT: stdu r1, -48(r1) -; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: std r0, 64(r1) ; CHECK-NEXT: std r30, 32(r1) # 8-byte Folded Spill ; CHECK-NEXT: # implicit-def: $x30 +; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: cmpdi r3, 0 ; CHECK-NEXT: crnot 4*cr2+lt, eq ; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB2_3 @@ -261,11 +261,11 @@ ; CHECK-NEXT: std r29, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -64(r1) ; CHECK-NEXT: fmr f31, f1 ; CHECK-NEXT: li r30, 0 ; CHECK-NEXT: li r29, 0 +; CHECK-NEXT: std r0, 80(r1) ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_1: # %bb1 ; CHECK-NEXT: # diff --git a/llvm/test/CodeGen/PowerPC/pr48527.ll b/llvm/test/CodeGen/PowerPC/pr48527.ll --- a/llvm/test/CodeGen/PowerPC/pr48527.ll +++ b/llvm/test/CodeGen/PowerPC/pr48527.ll @@ -13,8 +13,8 @@ ; CHECK-NEXT: mflr 0 ; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill -; CHECK-NEXT: std 0, 16(1) ; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: std 0, 80(1) ; CHECK-NEXT: lwz 30, 0(3) ; CHECK-NEXT: addis 3, 2, .LC0@toc@ha ; CHECK-NEXT: ld 29, .LC0@toc@l(3) diff --git a/llvm/test/CodeGen/PowerPC/pr49092.ll b/llvm/test/CodeGen/PowerPC/pr49092.ll --- a/llvm/test/CodeGen/PowerPC/pr49092.ll +++ b/llvm/test/CodeGen/PowerPC/pr49092.ll @@ -9,9 +9,9 @@ ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: add r3, r4, r3 +; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: addi r3, r3, 11 ; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: bl __gnu_h2f_ieee diff --git a/llvm/test/CodeGen/PowerPC/pr55463.ll b/llvm/test/CodeGen/PowerPC/pr55463.ll --- a/llvm/test/CodeGen/PowerPC/pr55463.ll +++ b/llvm/test/CodeGen/PowerPC/pr55463.ll @@ -5,9 +5,9 @@ ; CHECK-LABEL: baz: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stw 0, 4(1) ; CHECK-NEXT: stwu 1, -16(1) ; CHECK-NEXT: # implicit-def: $r3 +; CHECK-NEXT: stw 0, 20(1) ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %bb1 ; CHECK-NEXT: # @@ -42,12 +42,12 @@ ; CHECK-LABEL: wombat: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stw 0, 4(1) ; CHECK-NEXT: stwu 1, -48(1) ; CHECK-NEXT: li 3, .LCPI1_0@l ; CHECK-NEXT: li 5, .LCPI1_1@l ; CHECK-NEXT: lis 4, .LCPI1_0@ha ; CHECK-NEXT: lis 6, .LCPI1_1@ha +; CHECK-NEXT: stw 0, 52(1) ; CHECK-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill ; CHECK-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill ; CHECK-NEXT: evlddx 30, 4, 3 diff --git a/llvm/test/CodeGen/PowerPC/pr56469.ll b/llvm/test/CodeGen/PowerPC/pr56469.ll --- a/llvm/test/CodeGen/PowerPC/pr56469.ll +++ b/llvm/test/CodeGen/PowerPC/pr56469.ll @@ -7,7 +7,6 @@ ; CHECK-LABEL: callee: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stw 0, 4(1) ; CHECK-NEXT: stwu 1, -16(1) ; CHECK-NEXT: lfs 1, 24(1) ; CHECK-NEXT: lis 3, .L.str@ha @@ -16,6 +15,7 @@ ; CHECK-NEXT: lfs 3, 32(1) ; CHECK-NEXT: creqv 6, 6, 6 ; CHECK-NEXT: lfs 4, 36(1) +; CHECK-NEXT: stw 0, 20(1) ; CHECK-NEXT: bl printf ; CHECK-NEXT: lwz 0, 20(1) ; CHECK-NEXT: addi 1, 1, 16