Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -159,6 +159,17 @@ return get(Opcode).TSFlags & PPCII::XFormMemOp; } + /// Does instruction \p MI implicitly add its immediate operand to its + /// register operand? If so, return the pair of operand indices that are + /// implicitly added together (as ). Otherwise, return a pair + /// where both values are indices larger than number of operands for MI. + /// If the immediate must be a multiple of a certain value, the output + /// parameter \p ImmIsMultipleOf is set to that value. + std::pair + instrImplicitlyAddsImm(MachineInstr &MI, + unsigned &ImmIsMultipleOf) const; + bool convertDFormFedByAddi(MachineInstr &MI) const; + ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override; Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3347,3 +3347,89 @@ } return false; } + +std::pair +PPCInstrInfo::instrImplicitlyAddsImm(MachineInstr &MI, + unsigned &ImmIsMultipleOf) const { + unsigned FailVal = MI.getNumOperands() + 1; + ImmIsMultipleOf = 1; + switch (MI.getOpcode()) { + default: return std::make_pair(FailVal, FailVal); + case PPC::LD: + case PPC::STD: + case PPC::LWA: + case PPC::LXSD: + case PPC::LXSSP: + case PPC::STXSD: + case PPC::STXSSP: + ImmIsMultipleOf = 4; + return std::make_pair(1U, 2U); + case PPC::LXV: + case PPC::STXV: + ImmIsMultipleOf = 16; + return std::make_pair(1U, 2U); + case PPC::LBZ: + case PPC::LHZ: + case PPC::LHA: + case PPC::LWZ: + case PPC::STB: + case PPC::STH: + case PPC::STW: + case PPC::LFS: + case PPC::LFD: + case PPC::STFS: + case PPC::STFD: + return std::make_pair(1U, 2U); + } +} + +bool PPCInstrInfo::convertDFormFedByAddi(MachineInstr &MI) const { + unsigned Align = 1; + + // If the instruction takes an immediate and a register that are implicitly + // added and the respective operands actually are an immediate and a reg, we + // might be able to transform it. + std::pair ImmAndReg = instrImplicitlyAddsImm(MI, Align); + if (ImmAndReg.first >= MI.getNumOperands() || + ImmAndReg.second >= MI.getNumOperands() || + !MI.getOperand(ImmAndReg.first).isImm() || + !MI.getOperand(ImmAndReg.second).isReg()) + return false; + + MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + unsigned Reg = MI.getOperand(ImmAndReg.second).getReg(); + unsigned TrueReg = TargetRegisterInfo::isVirtualRegister(Reg) ? + TRI->lookThruCopyLike(Reg, MRI) : Reg; + int64_t ImmToAdd = 0; + int64_t MyImm = MI.getOperand(ImmAndReg.first).getImm(); + + // The input register must come from an addi whose operands are a register and + // an immediate (no frame indices, etc.). + MachineInstr *DefMI = nullptr; + if (TargetRegisterInfo::isVirtualRegister(TrueReg)) { + DefMI = MRI->getVRegDef(TrueReg); + if ((DefMI->getOpcode() != PPC::ADDI && DefMI->getOpcode() != PPC::ADDI8) || + !DefMI->getOperand(1).isReg() || !DefMI->getOperand(2).isImm()) + return false; + ImmToAdd = DefMI->getOperand(2).getImm(); + } else + return false; + + // If this instruction has specific alignment requirements for the disp. + // immediate, we have to make sure we respect it. + if ((ImmToAdd + MyImm) % Align) + return false; + if (!isInt<16>(ImmToAdd + MyImm)) + return false; + + DEBUG(dbgs() << "Converting: "); + DEBUG(MI.dump()); + DEBUG(dbgs() << "Fed by: "); + DEBUG(DefMI->dump()); + MI.getOperand(ImmAndReg.first).setImm(ImmToAdd + MyImm); + MI.getOperand(ImmAndReg.second).setReg(DefMI->getOperand(1).getReg()); + DEBUG(dbgs() << "Into: "); + DEBUG(MI.dump()); + return true; +} Index: lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- lib/Target/PowerPC/PPCMIPeephole.cpp +++ lib/Target/PowerPC/PPCMIPeephole.cpp @@ -48,6 +48,9 @@ STATISTIC(NumFixedPointIterations, "Number of fixed-point iterations converting reg-reg instructions " "to reg-imm ones"); +STATISTIC(NumDFormsSrcForwarded, + "Number of forwarded source registers for D-Form instructions fed " + "by addi"); static cl::opt FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), @@ -264,6 +267,12 @@ if (MI.isDebugValue()) continue; + if (TII->convertDFormFedByAddi(MI)) { + NumDFormsSrcForwarded++; + Simplified = true; + continue; + } + // Per-opcode peepholes. switch (MI.getOpcode()) { Index: test/CodeGen/PowerPC/forward-addi-src-to-D-form.ll =================================================================== --- test/CodeGen/PowerPC/forward-addi-src-to-D-form.ll +++ test/CodeGen/PowerPC/forward-addi-src-to-D-form.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +%"struct1" = type { i32, i32, %union1, %"struct3" } +%union1 = type { %struct2 } +%struct2 = type { i32 } +%"struct3" = type { %"struct4" } +%"struct4" = type { %"class1" } +%"class1" = type { %"class2" } +%"class2" = type { %"class3" } +%"class3" = type { %"struct5", %"struct7" } +%"struct5" = type { %"struct6" } +%"struct6" = type { i8 } +%"struct7" = type { %"struct8", i64 } +%"struct8" = type { i32, %"struct8"*, %"struct8"*, %"struct8"* } + +$test = comdat any + +; Function Attrs: nounwind +define void @test() { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %for.body.i.i.i.i.prol +; CHECK-NEXT: bc 4, 20, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.else.i.i.i.i.i.i.i.i.i.i.i.i.prol +; CHECK-NEXT: ld 3, 0(3) +; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: neg 3, 3 +; CHECK-NEXT: rldicr 3, 3, 0, 57 +; CHECK-NEXT: addi 4, 3, 152 +; CHECK-NEXT: std 5, 160(3) +; CHECK-NEXT: std 4, 0(3) +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_2: # %if.then.i.i.i.i.i.i.i.i.i.i.i.i.prol +entry: + %0 = load %"struct1"*, %"struct1"** undef, align 8 + %1 = ptrtoint %"struct1"* %0 to i64 + %2 = sub i64 0, %1 + %3 = lshr i64 %2, 6 + %4 = add nuw nsw i64 %3, 1 + %scevgep16.i.i.i.i80 = getelementptr %"struct1", %"struct1"* null, i64 %4 + br label %for.body.i.i.i.i.prol + +for.body.i.i.i.i.prol: ; preds = %entry + %5 = getelementptr inbounds %"struct1", %"struct1"* %scevgep16.i.i.i.i80, i64 1, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 + %6 = getelementptr inbounds i8, i8* %5, i64 8 + br i1 undef, label %if.else.i.i.i.i.i.i.i.i.i.i.i.i.prol, label %if.then.i.i.i.i.i.i.i.i.i.i.i.i.prol + +if.then.i.i.i.i.i.i.i.i.i.i.i.i.prol: ; preds = %for.body.i.i.i.i.prol + unreachable + +if.else.i.i.i.i.i.i.i.i.i.i.i.i.prol: ; preds = %for.body.i.i.i.i.prol + %_M_parent.i5.i.i.i.i.i.i.i.i.i.i.i.i.prol = getelementptr inbounds i8, i8* %5, i64 16 + %7 = bitcast i8* %_M_parent.i5.i.i.i.i.i.i.i.i.i.i.i.i.prol to %"struct8"** + store %"struct8"* null, %"struct8"** %7, align 8 + store i8* %6, i8** undef, align 8 + ret void +}