Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -294,6 +294,8 @@ const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const; static int getRecordFormOpcode(unsigned Opcode); + bool isTOCSaveMI(const MachineInstr &MI) const; + bool isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, const unsigned PhiDepth) const; Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2205,6 +2205,20 @@ return false; } +// This function returns true if the input MachineInstr is a TOC save +// instruction. +bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const { + if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg()) + return false; + unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); + unsigned StackOffset = MI.getOperand(1).getImm(); + unsigned StackReg = MI.getOperand(2).getReg(); + if (StackReg == PPC::X1 && StackOffset == TOCSaveOffset) + return true; + + return false; +} + // We limit the max depth to track incoming values of PHIs or binary ops // (e.g. AND) to avoid exsessive cost. const unsigned MAX_DEPTH = 1; Index: lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- lib/Target/PowerPC/PPCMIPeephole.cpp +++ lib/Target/PowerPC/PPCMIPeephole.cpp @@ -29,13 +29,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" #include "MCTargetDesc/PPCPredicates.h" using namespace llvm; #define DEBUG_TYPE "ppc-mi-peepholes" +STATISTIC(RemoveTOCSave, "Number of TOC saves removed"); STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions"); STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions"); STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI"); @@ -78,7 +78,9 @@ // Perform peepholes. bool eliminateRedundantCompare(void); - + bool eliminateRedundantTOCSaves(std::map &TOCSaves); + void UpdateTOCSaves(std::map &TOCSaves, + MachineInstr *MI); // Find the "true" register represented by SrcReg (following chains // of copies and subreg_to_reg operations). unsigned lookThruCopyLike(unsigned SrcReg); @@ -176,10 +178,36 @@ return 0; } +// This function maintains a map for the pairs +// Each time a new TOC save is encountered, it checks if any of the exisiting +// ones are dominated by the new one. If so, it marks the exisiting one as +// redundant by setting it's entry in the map as false. It then adds the new +// instruction to the map with either true or false depending on if any +// exisiting instructions dominated the new one. +void PPCMIPeephole::UpdateTOCSaves( + std::map &TOCSaves, MachineInstr *MI) { + bool Keep = true; + for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) { + MachineInstr *CurrInst = It->first; + // If new instruction dominates an exisiting one, mark exisiting one as + // redundant. + if (It->second && MDT->dominates(MI, CurrInst)) + It->second = false; + // Check if the new instruction is redundant. + if (MDT->dominates(CurrInst, MI)) { + Keep = false; + break; + } + } + // Add new instruction to map. + TOCSaves.insert( std::pair (MI, Keep)); +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; MachineInstr* ToErase = nullptr; + std::map TOCSaves; for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { @@ -201,6 +229,18 @@ default: break; + case PPC::STD: { + MachineFrameInfo &MFI = MF->getFrameInfo(); + if (MFI.hasVarSizedObjects() || + !MF->getSubtarget().isELFv2ABI()) + break; + // When encountering a TOC save instruction, call UpdateTOCSaves + // to add it to the TOCSaves map and mark any exisiting TOC saves + // it dominates as redundant. + if (TII->isTOCSaveMI(MI)) + UpdateTOCSaves(TOCSaves, &MI); + break; + } case PPC::XXPERMDI: { // Perform simplifications of 2x64 vector swaps and splats. // A swap is identified by an immediate value of 2, and a splat @@ -683,6 +723,8 @@ } } + // Eliminate all the TOC save instructions which are redundant. + Simplified |= eliminateRedundantTOCSaves(TOCSaves); // We try to eliminate redundant compare instruction. Simplified |= eliminateRedundantCompare(); @@ -884,6 +926,24 @@ return false; } +// This function will iterate over the input map containing a pair of TOC save +// instruction and a flag. The flag will be set to false if the TOC save is proven +// redundant. This function will erase from the basic block all the TOC saves +// marked as redundant. +bool PPCMIPeephole::eliminateRedundantTOCSaves( + std::map &TOCSaves) { + bool Simplified = false; + for (auto TOCSave : TOCSaves) { + if (!TOCSave.second) { + TOCSave.first->eraseFromParent(); + RemoveTOCSave++; + Simplified = true; + } + } + + return Simplified; +} + // If multiple conditional branches are executed based on the (essentially) // same comparison, we merge compare instructions into one and make multiple // conditional branches on this comparison. Index: test/CodeGen/PowerPC/remove-redundant-toc-saves.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/remove-redundant-toc-saves.ll @@ -0,0 +1,123 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +define signext i32 @test1(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) { +entry: +; CHECK-LABEL: test1: +; CHECK: std 2, 24(1) +; CHECK-NOT: std 2, 24(1) + %call = tail call signext i32 %Func(i32 signext %i) + %call1 = tail call signext i32 %Func2(i32 signext %i) + %add2 = add nsw i32 %call1, %call + ret i32 %add2 +} + +define signext i32 @test2(i32 signext %i, i32 signext %j, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) { +entry: +; CHECK-LABEL: test2: +; CHECK: std 2, 24(1) +; CHECK-NOT: std 2, 24(1) + %call = tail call signext i32 %Func(i32 signext %i) + %tobool = icmp eq i32 %j, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %call1 = tail call signext i32 %Func(i32 signext %i) + %add2 = add nsw i32 %call1, %call + %call3 = tail call signext i32 %Func2(i32 signext %i) + %add4 = add nsw i32 %add2, %call3 + br label %if.end + +if.end: ; preds = %entry, %if.then + %Sum.0 = phi i32 [ %add4, %if.then ], [ %call, %entry ] + %call5 = tail call signext i32 %Func(i32 signext %i) + %add6 = add nsw i32 %call5, %Sum.0 + ret i32 %add6 +} + +; Check for multiple TOC saves with if then else where neither dominates the other. +define signext i32 @test3(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) { +; CHECK-LABEL: test3: +; CHECK: std 2, 24(1) +; CHECK: std 2, 24(1) +; CHECK-NOT: std 2, 24(1) +entry: + %tobool = icmp eq i32 %i, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %call = tail call signext i32 %Func(i32 signext %i) + br label %if.end + +if.else: ; preds = %entry + %call1 = tail call signext i32 %Func2(i32 signext 0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %Sum.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ] + %call3 = tail call signext i32 %Func(i32 signext %i) + %add4 = add nsw i32 %call3, %Sum.0 + ret i32 %add4 +} + +define signext i32 @test4(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) { +; CHECK-LABEL: test4: +; CHECK: std 2, 24(1) +; CHECK-NOT: std 2, 24(1) + +entry: + %call = tail call signext i32 %Func(i32 signext %i) + %tobool = icmp eq i32 %i, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %call1 = tail call signext i32 %Func(i32 signext %i) + br label %if.end + +if.else: ; preds = %entry + %call3 = tail call signext i32 %Func2(i32 signext 0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %call1.pn = phi i32 [ %call1, %if.then ], [ %call3, %if.else ] + %Sum.0 = add nsw i32 %call1.pn, %call + ret i32 %Sum.0 +} + +; Check for multiple TOC saves with if then where neither is redundant. +define signext i32 @test5(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture readnone %Func2) { +entry: +; CHECK-LABEL: test5: +; CHECK: std 2, 24(1) +; CHECK: std 2, 24(1) + + %tobool = icmp eq i32 %i, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %call = tail call signext i32 %Func(i32 signext %i) + br label %if.end + +if.end: ; preds = %entry, %if.then + %Sum.0 = phi i32 [ %call, %if.then ], [ 0, %entry ] + %call1 = tail call signext i32 %Func(i32 signext %i) + %add2 = add nsw i32 %call1, %Sum.0 + ret i32 %add2 +} + +; Check for multiple TOC saves if there are dynamic allocations on the stack. +define signext i32 @test6(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) { +entry: +; CHECK-LABEL: test6: +; CHECK: std 2, 24(1) +; CHECK: std 2, 24(1) + + %conv = sext i32 %i to i64 + %0 = alloca i8, i64 %conv, align 16 + %1 = bitcast i8* %0 to i32* + %call = tail call signext i32 %Func(i32 signext %i) + call void @useAlloca(i32* nonnull %1, i32 signext %call) + %call1 = call signext i32 %Func2(i32 signext %i) + %add2 = add nsw i32 %call1, %call + ret i32 %add2 +} + +declare void @useAlloca(i32*, i32 signext)