Index: lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- lib/Target/PowerPC/PPCMIPeephole.cpp +++ lib/Target/PowerPC/PPCMIPeephole.cpp @@ -29,13 +29,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" #include "MCTargetDesc/PPCPredicates.h" using namespace llvm; #define DEBUG_TYPE "ppc-mi-peepholes" +STATISTIC(RemoveTOCSave, "Number of TOC saves removed"); STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions"); STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions"); STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI"); @@ -78,6 +78,7 @@ // Perform peepholes. bool eliminateRedundantCompare(void); + bool eliminateRedundantTOCSaves(std::list TOCSaves); // Find the "true" register represented by SrcReg (following chains // of copies and subreg_to_reg operations). @@ -180,6 +181,7 @@ bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; MachineInstr* ToErase = nullptr; + std::list TOCSaves; for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { @@ -201,6 +203,24 @@ default: break; + case PPC::STD: { + // Store all TOC save instructions, std r2,24(r1), into TOCSaves list + // for function eliminateRedundantTOCSaves. This function will then + // remove any TOC saves which are redundant due to having a dominating + // save. + if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg()) + break; + unsigned StackOffset = MI.getOperand(1).getImm(); + unsigned StackReg = MI.getOperand(2).getReg(); + if (StackReg == PPC::X1 && StackOffset == 24) { + MachineFrameInfo &MFI = MF->getFrameInfo(); + if (MFI.hasVarSizedObjects() || + !MF->getSubtarget().isELFv2ABI()) + break; + TOCSaves.push_back(&MI); + } + break; + } case PPC::XXPERMDI: { // Perform simplifications of 2x64 vector swaps and splats. // A swap is identified by an immediate value of 2, and a splat @@ -683,6 +703,7 @@ } } + Simplified |= eliminateRedundantTOCSaves(TOCSaves); // We try to eliminate redundant compare instruction. Simplified |= eliminateRedundantCompare(); @@ -884,6 +905,30 @@ return false; } +// This function will iterate over the input list containing TOC save +// instructions found in the Machine Function and remove the ones which are +// dominated by another TOC Save instruction. +bool PPCMIPeephole::eliminateRedundantTOCSaves( + std::list TOCSaves) { + bool Simplified = false; + for (auto Iter = TOCSaves.begin(); Iter != TOCSaves.end(); Iter++) { + MachineInstr *CurrSave = *Iter; + auto Iter2 = TOCSaves.begin(); + while (Iter2 != TOCSaves.end()) { + if ((Iter != Iter2) && (MDT->dominates(CurrSave, *Iter2))) { + (*Iter2)->eraseFromParent(); + Iter2 = TOCSaves.erase(Iter2); + Simplified = true; + RemoveTOCSave++; + } else { + Iter2++; + } + } + } + + return Simplified; +} + // If multiple conditional branches are executed based on the (essentially) // same comparison, we merge compare instructions into one and make multiple // conditional branches on this comparison. Index: test/CodeGen/PowerPC/remove-redundant-toc-saves.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/remove-redundant-toc-saves.ll @@ -0,0 +1,120 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +define signext i32 @test1(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) { +entry: +; CHECK-LABEL: test1: +; CHECK: std 2, 24(1) +; CHECK-NOT: std 2, 24(1) + %call = tail call signext i32 %Func(i32 signext %i) + %call1 = tail call signext i32 %Func2(i32 signext %i) + %add2 = add nsw i32 %call1, %call + ret i32 %add2 +} + +define signext i32 @test2(i32 signext %i, i32 signext %j, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) { +entry: +; CHECK-LABEL: test2: +; CHECK: std 2, 24(1) +; CHECK-NOT: std 2, 24(1) + %call = tail call signext i32 %Func(i32 signext %i) + %tobool = icmp eq i32 %j, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %call1 = tail call signext i32 %Func(i32 signext %i) + %add2 = add nsw i32 %call1, %call + %call3 = tail call signext i32 %Func2(i32 signext %i) + %add4 = add nsw i32 %add2, %call3 + br label %if.end + +if.end: ; preds = %entry, %if.then + %Sum.0 = phi i32 [ %add4, %if.then ], [ %call, %entry ] + %call5 = tail call signext i32 %Func(i32 signext %i) + %add6 = add nsw i32 %call5, %Sum.0 + ret i32 %add6 +} + +define signext i32 @test3(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) { +; CHECK-LABEL: test3: +; CHECK: std 2, 24(1) +; CHECK: std 2, 24(1) +; CHECK-NOT: std 2, 24(1) +entry: + %tobool = icmp eq i32 %i, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %call = tail call signext i32 %Func(i32 signext %i) + br label %if.end + +if.else: ; preds = %entry + %call1 = tail call signext i32 %Func2(i32 signext 0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %Sum.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ] + %call3 = tail call signext i32 %Func(i32 signext %i) + %add4 = add nsw i32 %call3, %Sum.0 + ret i32 %add4 +} + +define signext i32 @test4(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) { +; CHECK-LABEL: test4: +; CHECK: std 2, 24(1) +; CHECK-NOT: std 2, 24(1) + +entry: + %call = tail call signext i32 %Func(i32 signext %i) + %tobool = icmp eq i32 %i, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %call1 = tail call signext i32 %Func(i32 signext %i) + br label %if.end + +if.else: ; preds = %entry + %call3 = tail call signext i32 %Func2(i32 signext 0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %call1.pn = phi i32 [ %call1, %if.then ], [ %call3, %if.else ] + %Sum.0 = add nsw i32 %call1.pn, %call + ret i32 %Sum.0 +} + +define signext i32 @test5(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture readnone %Func2) { +entry: +; CHECK-LABEL: test5: +; CHECK: std 2, 24(1) +; CHECK: std 2, 24(1) + + %tobool = icmp eq i32 %i, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %call = tail call signext i32 %Func(i32 signext %i) + br label %if.end + +if.end: ; preds = %entry, %if.then + %Sum.0 = phi i32 [ %call, %if.then ], [ 0, %entry ] + %call1 = tail call signext i32 %Func(i32 signext %i) + %add2 = add nsw i32 %call1, %Sum.0 + ret i32 %add2 +} + +define signext i32 @test6(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) { +entry: +; CHECK-LABEL: test6: +; CHECK: std 2, 24(1) +; CHECK: std 2, 24(1) + + %conv = sext i32 %i to i64 + %0 = alloca i8, i64 %conv, align 16 + %1 = bitcast i8* %0 to i32* + %call = tail call signext i32 %Func(i32 signext %i) + call void @useAlloca(i32* nonnull %1, i32 signext %call) + %call1 = call signext i32 %Func2(i32 signext %i) + %add2 = add nsw i32 %call1, %call + ret i32 %add2 +} + +declare void @useAlloca(i32*, i32 signext)