Index: llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -57,6 +57,108 @@ MachineFunctionProperties::Property::NoVRegs); } + // This function removes any redundant load immediates. It has two level + // loops - The outer loop finds the load immediates BBI that could be use to + // replace following redundancy. The inner loop scans instructions that + // after BBI to find redundancy and update kill/dead flags accordingly. If + // AfterBBI is the same as BBI, it is redundant, otherwise any instructions + // that modify the def register of BBI would break the scanning. + // DeadOrKillToUnset is a pointer to the previous operand has kill/dead + // flag. It tracks from the def register of BBI, use registers of AfterBBIs + // and def registers of AfterBBIs. + bool removeRedundantLIs(MachineBasicBlock &MBB, + const TargetRegisterInfo *TRI) { + LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n"; + MBB.dump(); dbgs() << "\n"); + + DenseSet InstrsToErase; + for (auto BBI = MBB.instr_begin(), BBE = MBB.instr_end(); BBI != BBE; + ++BBI) { + // Skip load immediate that is marked to be erased later because it + // cannot be used to replace any other instructions. + if (InstrsToErase.find(&*BBI) != InstrsToErase.end()) + continue; + // Skip non-load immediate. + unsigned Opc = BBI->getOpcode(); + if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS && + Opc != PPC::LIS8) + continue; + // Skip load immediate, which operand is a relocation (e.g., $r3 = LI + // target-flags(ppc-lo) %const.0). + if (!BBI->getOperand(1).isImm()) + continue; + assert(BBI->getOperand(0).isReg() && + "Expected a register for the first operand"); + + LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump();); + + unsigned Reg = BBI->getOperand(0).getReg(); + int64_t Imm = BBI->getOperand(1).getImm(); + MachineOperand *DeadOrKillToUnset = nullptr; + if (BBI->getOperand(0).isDead()) { + DeadOrKillToUnset = &BBI->getOperand(0); + LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset + << " from load immediate " << *BBI + << " is a unsetting candidate\n"); + } + // This loop scans instructions after BBI to see if there is any + // redundant load immediate. + for (auto AfterBBI = std::next(BBI); AfterBBI != BBE; ++AfterBBI) { + // Track the operand that kill Reg. We would unset the kill flag of + // the operand if there is a following redundant load immediate. + int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI); + if (KillIdx != -1) { + assert(!DeadOrKillToUnset && "Shouldn't kill same register twice"); + DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx); + LLVM_DEBUG(dbgs() + << " Kill flag of " << *DeadOrKillToUnset << " from " + << *AfterBBI << " is a unsetting candidate\n"); + } + + if (!AfterBBI->modifiesRegister(Reg, TRI)) + continue; + assert(DeadOrKillToUnset && + "Shouldn't overwrite a register before it is killed"); + // Finish scanning because Reg is overwritten by a non-load + // instruction. + if (AfterBBI->getOpcode() != Opc) + break; + assert(AfterBBI->getOperand(0).isReg() && + "Expected a register for the first operand"); + // Finish scanning because Reg is overwritten by a relocation or a + // different value. + if (!AfterBBI->getOperand(1).isImm() || + AfterBBI->getOperand(1).getImm() != Imm) + break; + + // It loads same immediate value to the same Reg, which is redundant. + // We would unset kill flag in previous Reg usage to extend live range + // of Reg first, then remove the redundancy. + LLVM_DEBUG(dbgs() << " Unset dead/kill flag of " << *DeadOrKillToUnset + << " from " << *DeadOrKillToUnset->getParent()); + if (DeadOrKillToUnset->isDef()) + DeadOrKillToUnset->setIsDead(false); + else + DeadOrKillToUnset->setIsKill(false); + DeadOrKillToUnset = + AfterBBI->findRegisterDefOperand(Reg, true, true, TRI); + if (DeadOrKillToUnset) + LLVM_DEBUG(dbgs() + << " Dead flag of " << *DeadOrKillToUnset << " from " + << *AfterBBI << " is a unsetting candidate\n"); + InstrsToErase.insert(&*AfterBBI); + LLVM_DEBUG(dbgs() << " Remove redundant load immediate: "; + AfterBBI->dump()); + } + } + + for (MachineInstr *MI : InstrsToErase) { + MI->eraseFromParent(); + NumRemovedInPreEmit++; + } + return InstrsToErase.size() > 0; + } + bool runOnMachineFunction(MachineFunction &MF) override { if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) return false; @@ -65,6 +167,7 @@ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); SmallVector InstrsToErase; for (MachineBasicBlock &MBB : MF) { + Changed |= removeRedundantLIs(MBB, TRI); for (MachineInstr &MI : MBB) { unsigned Opc = MI.getOpcode(); // Detect self copies - these can result from running AADB. Index: llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -O3 < %s | FileCheck %s -check-prefix=PPC64LE + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +@global.6 = external global i32* + +declare void @barney.88(i1, i32*) +declare void @barney.94(i8*, i32) + +define void @redundancy_on_ppc_only(i1 %arg7) { +; PPC64LE-LABEL: redundancy_on_ppc_only: +; PPC64LE: # %bb.0: # %bb +; PPC64LE-NEXT: mflr 0 +; PPC64LE-NEXT: andi. 3, 3, 1 +; PPC64LE-NEXT: std 0, 16(1) +; PPC64LE-NEXT: stdu 1, -32(1) +; PPC64LE-NEXT: .cfi_def_cfa_offset 32 +; PPC64LE-NEXT: .cfi_offset lr, 16 +; PPC64LE-NEXT: li 3, 1 +; PPC64LE-NEXT: li 4, 0 +; PPC64LE-NEXT: isel 3, 3, 4, 1 +; PPC64LE-NEXT: bl barney.88 +; PPC64LE-NEXT: nop +; PPC64LE-NEXT: addi 1, 1, 32 +; PPC64LE-NEXT: ld 0, 16(1) +; PPC64LE-NEXT: mtlr 0 +; PPC64LE-NEXT: blr +bb: + br label %bb10 + +bb10: ; preds = %bb + call void @barney.88(i1 %arg7, i32* null) + ret void +} + +define void @redundancy_on_ppc_and_other_targets() { +; PPC64LE-LABEL: redundancy_on_ppc_and_other_targets: +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: mflr 0 +; PPC64LE-NEXT: std 0, 16(1) +; PPC64LE-NEXT: stdu 1, -32(1) +; PPC64LE-NEXT: .cfi_def_cfa_offset 32 +; PPC64LE-NEXT: .cfi_offset lr, 16 +; PPC64LE-NEXT: addis 3, 2, .LC0@toc@ha +; PPC64LE-NEXT: ld 3, .LC0@toc@l(3) +; PPC64LE-NEXT: li 4, 0 +; PPC64LE-NEXT: std 4, 0(3) +; PPC64LE-NEXT: bl barney.94 +; PPC64LE-NEXT: nop + store i32* null, i32** @global.6 + call void @barney.94(i8* undef, i32 0) + unreachable +} Index: llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.mir @@ -0,0 +1,348 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -run-pass ppc-pre-emit-peephole %s -o - | FileCheck %s + +--- +name: t1 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1 + + ; CHECK-LABEL: name: t1 + ; CHECK: liveins: $x1 + ; CHECK: renamable $x3 = LI8 0 + ; CHECK: STD renamable $x3, 16, $x1 + ; CHECK: STD killed renamable $x3, 8, $x1 + ; CHECK: BLR8 implicit $lr8, implicit $rm + renamable $x3 = LI8 0 + STD killed renamable $x3, 16, $x1 + renamable $x3 = LI8 0 + STD killed renamable $x3, 8, $x1 + BLR8 implicit $lr8, implicit $rm + +... +--- +name: t2 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1 + + ; CHECK-LABEL: name: t2 + ; CHECK: liveins: $x1 + ; CHECK: renamable $x3 = LI8 0 + ; CHECK: STD renamable $x3, 32, $x1 + ; CHECK: STD renamable $x3, 24, $x1 + ; CHECK: STD renamable $x3, 16, $x1 + ; CHECK: STD killed renamable $x3, 8, $x1 + ; CHECK: BLR8 implicit $lr8, implicit $rm + renamable $x3 = LI8 0 + STD killed renamable $x3, 32, $x1 + renamable $x3 = LI8 0 + STD killed renamable $x3, 24, $x1 + renamable $x3 = LI8 0 + STD killed renamable $x3, 16, $x1 + renamable $x3 = LI8 0 + STD killed renamable $x3, 8, $x1 + BLR8 implicit $lr8, implicit $rm + +... +--- +name: t3 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1 + + ; CHECK-LABEL: name: t3 + ; CHECK: liveins: $x1 + ; CHECK: renamable $x3 = LI8 0 + ; CHECK: STD renamable $x3, 32, $x1 + ; CHECK: STD renamable $x3, 24, $x1 + ; CHECK: BLR8 implicit $lr8, implicit $rm + renamable $x3 = LI8 0 + STD killed renamable $x3, 32, $x1 + renamable $x3 = LI8 0 + STD renamable $x3, 24, $x1 + BLR8 implicit $lr8, implicit $rm + +... +--- +name: t4 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1 + + ; CHECK-LABEL: name: t4 + ; CHECK: liveins: $x1 + ; CHECK: renamable $x3 = LI8 0 + ; CHECK: STD renamable $x3, 16, $x1 + ; CHECK: renamable $x4 = ADDI8 renamable $x3, 8 + ; CHECK: STD killed renamable $x3, 8, $x1 + ; CHECK: BLR8 implicit $lr8, implicit $rm + renamable $x3 = LI8 0 + STD killed renamable $x3, 16, $x1 + renamable $x3 = LI8 0 + renamable $x4 = ADDI8 killed renamable $x3, 8 + renamable $x3 = LI8 0 + STD killed renamable $x3, 8, $x1 + BLR8 implicit $lr8, implicit $rm + +... +--- +name: t5 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1 + + ; CHECK-LABEL: name: t5 + ; CHECK: liveins: $x1 + ; CHECK: renamable $r3 = LI 0 + ; CHECK: STW renamable $r3, 16, $x1 + ; CHECK: STW killed renamable $r3, 12, $x1 + ; CHECK: renamable $r3 = LI 1 + ; CHECK: BLR8 implicit $lr8, implicit $rm + renamable $r3 = LI 0 + STW killed renamable $r3, 16, $x1 + renamable $r3 = LI 0 + STW killed renamable $r3, 12, $x1 + renamable $r3 = LI 1 + BLR8 implicit $lr8, implicit $rm + +... +--- +name: t6 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1 + + ; CHECK-LABEL: name: t6 + ; CHECK: liveins: $x1 + ; CHECK: renamable $x3 = LI8 0 + ; CHECK: renamable $x4 = LI8 1 + ; CHECK: STD renamable $x3, 32, $x1 + ; CHECK: STD renamable $x4, 24, $x1 + ; CHECK: STD killed renamable $x3, 16, $x1 + ; CHECK: STD killed renamable $x4, 8, $x1 + ; CHECK: BLR8 implicit $lr8, implicit $rm + renamable $x3 = LI8 0 + renamable $x4 = LI8 1 + STD killed renamable $x3, 32, $x1 + STD killed renamable $x4, 24, $x1 + renamable $x3 = LI8 0 + renamable $x4 = LI8 1 + STD killed renamable $x3, 16, $x1 + STD killed renamable $x4, 8, $x1 + BLR8 implicit $lr8, implicit $rm + +... +--- +name: t7 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1, $x4 + + ; CHECK-LABEL: name: t7 + ; CHECK: liveins: $x1, $x4 + ; CHECK: renamable $x3 = LI8 0 + ; CHECK: STD killed renamable $x3, 32, $x1 + ; CHECK: renamable $x3 = ADDI8 $x4, 6 + ; CHECK: BLR8 implicit $lr8, implicit $rm + renamable $x3 = LI8 0 + STD killed renamable $x3, 32, $x1 + renamable $x3 = ADDI8 $x4, 6 + BLR8 implicit $lr8, implicit $rm + +... +--- +name: t8 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1 + + ; CHECK-LABEL: name: t8 + ; CHECK: liveins: $x1 + ; CHECK: renamable $x3 = LI8 0 + ; CHECK: STD renamable $x3, 32, $x1 + ; CHECK: BLR8 implicit $lr8, implicit $rm + renamable $x3 = LI8 0 + STD killed renamable $x3, 32, $x1 + renamable $x3 = LI8 0 + BLR8 implicit $lr8, implicit $rm + +... +--- +name: t9 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: t9 + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $x3 + ; CHECK: renamable $r4 = LI 0, implicit-def $x4 + ; CHECK: renamable $x24 = RLDICL renamable $x4, 0, 32 + ; CHECK: renamable $cr0 = CMPLDI renamable $x3, 0 + ; CHECK: BCC 68, killed renamable $cr0, %bb.1 + ; CHECK: B %bb.2 + ; CHECK: bb.1: + ; CHECK: liveins: $r4, $x1 + ; CHECK: STW killed renamable $r4, 16, $x1 + ; CHECK: BLR8 implicit $lr8, implicit $rm + ; CHECK: bb.2: + ; CHECK: liveins: $r4, $x1 + ; CHECK: STW killed renamable $r4, 32, $x1 + ; CHECK: BLR8 implicit $lr8, implicit $rm + bb.0.entry: + liveins: $x3 + successors: %bb.8, %bb.7 + + renamable $r4 = LI 0, implicit-def $x4 + renamable $x24 = RLDICL killed renamable $x4, 0 , 32 + renamable $cr0 = CMPLDI renamable $x3, 0 + renamable $r4 = LI 0 + BCC 68, killed renamable $cr0, %bb.7 + B %bb.8 + + bb.7: + liveins: $r4, $x1 + STW killed renamable $r4, 16, $x1 + BLR8 implicit $lr8, implicit $rm + + bb.8: + liveins: $r4, $x1 + STW killed renamable $r4, 32, $x1 + BLR8 implicit $lr8, implicit $rm + +... +--- +name: t10 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1 + + ; CHECK-LABEL: name: t10 + ; CHECK: liveins: $x1 + ; CHECK: renamable $x3 = LI8 24 + ; CHECK: STD killed renamable $x3, 16, $x1 + ; CHECK: renamable $r3 = LI 0 + ; CHECK: STW killed renamable $r3, 26, $x1 + ; CHECK: BLR8 implicit $lr8, implicit $rm + renamable $x3 = LI8 24 + STD killed renamable $x3, 16, $x1 + renamable $r3 = LI 0 + STW killed renamable $r3, 26, $x1 + BLR8 implicit $lr8, implicit $rm + +... +--- +name: LIS8 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1 + + ; CHECK-LABEL: name: LIS8 + ; CHECK: liveins: $x1 + ; CHECK: renamable $x3 = LIS8 0 + ; CHECK: STD renamable $x3, 16, $x1 + ; CHECK: STD killed renamable $x3, 8, $x1 + ; CHECK: BLR8 implicit $lr8, implicit $rm + renamable $x3 = LIS8 0 + STD killed renamable $x3, 16, $x1 + renamable $x3 = LIS8 0 + STD killed renamable $x3, 8, $x1 + BLR8 implicit $lr8, implicit $rm + +... +--- +name: LIS +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1 + + ; CHECK-LABEL: name: LIS + ; CHECK: liveins: $x1 + ; CHECK: renamable $r3 = LIS 0 + ; CHECK: STW renamable $r3, 16, $x1 + ; CHECK: STW killed renamable $r3, 12, $x1 + ; CHECK: BLR8 implicit $lr8, implicit $rm + renamable $r3 = LIS 0 + STW killed renamable $r3, 16, $x1 + renamable $r3 = LIS 0 + STW killed renamable $r3, 12, $x1 + BLR8 implicit $lr8, implicit $rm + +... +--- +name: modify_and_kill_the_reg_in_the_same_inst +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + + ; CHECK-LABEL: name: modify_and_kill_the_reg_in_the_same_inst + ; CHECK: renamable $x6 = LI8 1 + ; CHECK: renamable $x6 = RLDICR killed renamable $x6, 44, 19 + ; CHECK: BLR8 implicit $lr8, implicit $rm + renamable $x6 = LI8 1 + renamable $x6 = RLDICR killed renamable $x6, 44, 19 + BLR8 implicit $lr8, implicit $rm + +... +--- +name: dead_load_immediate_followed_by_a_redundancy +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1 + + ; CHECK-LABEL: name: dead_load_immediate_followed_by_a_redundancy + ; CHECK: liveins: $x1 + ; CHECK: renamable $r3 = LI 128 + ; CHECK: renamable $x4 = ADDI8 $x1, -128 + ; CHECK: renamable $x5 = ADDI8 $x1, -128 + ; CHECK: STW killed renamable $r3, 16, $x4 + ; CHECK: BLR8 implicit $lr8, implicit $rm + dead renamable $r3 = LI 128 + renamable $x4 = ADDI8 $x1, -128 + dead renamable $r3 = LI 128 + renamable $x5 = ADDI8 $x1, -128 + renamable $r3 = LI 128 + STW killed renamable $r3, 16, $x4 + BLR8 implicit $lr8, implicit $rm + +...