Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -172,6 +172,20 @@ MachineBasicBlock::iterator MI) const override; + + /// Get the base register and byte offset of an instruction that reads/writes + /// memory. + virtual bool getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg, + int64_t &Offset, + const TargetRegisterInfo *TRI) const override; + + /// Returns true if the two given memory operations should be scheduled + /// adjacent. Called from the LoadCluster/StoreCluster DAG mutation passes. + /// to TargetPassConfig::createMachineScheduler() to have an effect. + virtual bool shouldClusterMemOps(MachineInstr &FirstLdSt, + MachineInstr &SecondLdSt, + unsigned NumLoads) const override; + // Branch analysis. bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1930,3 +1930,106 @@ return &PPC::VSRCRegClass; return RC; } + +static bool tryExtractImm(MachineInstr *MI, int64_t &Offset) { + switch (MI->getOpcode()) { + default: + return false; + case PPC::LI8: + case PPC::LI: + assert(MI->getOperand(1).isImm()); + Offset = MI->getOperand(1).getImm(); + return true; + } +} + +bool PPCInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg, + int64_t &Offset, + const TargetRegisterInfo *TRI) const { + + const MCInstrDesc &InstrDesc = get(MemOp.getOpcode()); + + // mayLoad and mayStore means its a MachineInstr with side-effects rather then + // an actual memory operation. + if (InstrDesc.mayLoad() && InstrDesc.mayStore()) { + return false; + } + + // expect at least 3 (explicit) operands. + if (MemOp.getNumExplicitOperands() < 3) { + return false; + } + + MachineOperand &Mo1 = MemOp.getOperand(1); + MachineOperand &Mo2 = MemOp.getOperand(2); + + // Check for 'BaseAddr + imm' and 'imm + BaseAddr'. + if (Mo1.isReg() && Mo2.isImm()) { + BaseReg = Mo1.getReg(); + Offset = Mo2.getImm(); + return true; + } else if (Mo2.isReg() && Mo1.isImm()) { + BaseReg = Mo2.getReg(); + Offset = Mo1.getImm(); + return true; + } + + if (!(Mo1.isReg() && Mo2.isReg())) { + return false; + } + + // check for 'r0 + BaseAddr' + if (Mo1.getReg() == PPC::ZERO8 || Mo1.getReg() == PPC::ZERO) { + BaseReg = Mo2.getReg(); + Offset = 0; + return true; + } + + BaseReg = Mo1.getReg(); + MachineRegisterInfo &MRI = MemOp.getParent()->getParent()->getRegInfo(); + + // Try to extract an immediate from the register definition. + if (TargetRegisterInfo::isVirtualRegister(Mo2.getReg())) { + // May have more then one def + if (!MRI.hasOneDef(Mo2.getReg())) { + return false; + } + + MachineInstr *MI = MRI.getVRegDef(Mo2.getReg()); + return tryExtractImm(MI, Offset); + } + + // TODO Do I need to handle physical registers at this point? + return false; +} + +bool PPCInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, + MachineInstr &SecondLdSt, + unsigned NumLoads) const { + + if (FirstLdSt.hasOrderedMemoryRef() || SecondLdSt.hasOrderedMemoryRef()) { + return false; + } + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + unsigned FirstBaseReg, SecondBaseReg; + int64_t FirstOffset, SecondOffset; + + if (!getMemOpBaseRegImmOfs(FirstLdSt, FirstBaseReg, FirstOffset, TRI) || + !getMemOpBaseRegImmOfs(SecondLdSt, SecondBaseReg, SecondOffset, TRI)) { + return false; + } + + // Only cluster loads from the same base address + if (FirstBaseReg != SecondBaseReg) + return false; + + int64_t OffsetDiff = FirstOffset > SecondOffset ? FirstOffset - SecondOffset + : SecondOffset - FirstOffset; + + if (OffsetDiff > 128) { + return false; + } + + return true; +} Index: lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetMachine.cpp +++ lib/Target/PowerPC/PPCTargetMachine.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -303,6 +304,17 @@ return getTM(); } + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + // TODO: Create the default for non-P8 Targets. + // ScheduleDAGInstrs *DAG = TargetPassConfig::createMachineScheduler(C); + ScheduleDAGMILive *DAG = createGenericSchedLive(C); + DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + + return DAG; + } + void addIRPasses() override; bool addPreISel() override; bool addILPOpts() override; Index: test/CodeGen/PowerPC/cluster.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/cluster.ll @@ -0,0 +1,74 @@ +; RUN: llc -verify-machineinstrs -mattr=-altivec < %s | FileCheck %s -check-prefix=CHECK +; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck %s -check-prefix=PWR9 + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +define void @foo(i64* noalias nocapture readonly %src, i64* noalias nocapture %dst) local_unnamed_addr #0 { +entry: +; CHECK-LABEL: @foo +; CHECK: ld [[REG1:[0-9]+]], 0([[BASE1:[0-9]+]]) +; CHECK-NEXT: ld [[REG2:[0-9]+]], 8([[BASE1]]) +; CHECK-NEXT: ld [[REG3:[0-9]+]], 16([[BASE1]]) +; CHECK-NEXT: std [[REG1]], 0([[BASE2:[0-9]+]]) +; CHECK-NEXT: std [[REG2]], 8([[BASE2]]) +; CHECK-NEXT: std [[REG3]], 16([[BASE2]]) + %0 = load i64, i64* %src, align 8, !tbaa !2 + store i64 %0, i64* %dst, align 8, !tbaa !2 + %arrayidx2 = getelementptr inbounds i64, i64* %src, i64 1 + %1 = load i64, i64* %arrayidx2, align 8, !tbaa !2 + %arrayidx3 = getelementptr inbounds i64, i64* %dst, i64 1 + store i64 %1, i64* %arrayidx3, align 8, !tbaa !2 + %arrayidx4 = getelementptr inbounds i64, i64* %src, i64 2 + %2 = load i64, i64* %arrayidx4, align 8, !tbaa !2 + %arrayidx5 = getelementptr inbounds i64, i64* %dst, i64 2 + store i64 %2, i64* %arrayidx5, align 8, !tbaa !2 + ret void +} + +define void @foo2(i64* noalias nocapture readonly %src, i64* noalias nocapture %dst) local_unnamed_addr #0 { +entry: +; PWR9-LABEL: @foo2 +; PWR9: li [[OFF1:[0-9]+]], 16 +; PWR9-NEXT: li [[OFF2:[0-9]+]], 32 +; PWR9-NEXT: li [[OFF3:[0-9]+]], 48 +; PWR9-NEXT: lxvx [[REG:[0-9]+]], 0, [[BASE1:[0-9]+]] +; PWR9-NEXT: stxvx [[REG]], 0, [[BASE2:[0-9]+]] +; PWR9-DAG: lxvx [[REG1:[0-9]+]], [[BASE1]], [[OFF1]] +; PWR9-NEXT: lxvx [[REG2:[0-9]+]], [[BASE1]], [[OFF2]] +; PWR9-NEXT: lxvx [[REG3:[0-9]+]], [[BASE1]], [[OFF3]] +; PWR9-NEXT: stxvx [[REG1]], [[BASE2]], [[OFF1]] +; PWR9-NEXT: stxvx [[REG2]], [[BASE2]], [[OFF2]] +; PWR9-NEXT: stxvx [[REG3]], [[BASE2]], [[OFF3]] + + %0 = bitcast i64* %src to <2 x i64>* + %1 = load <2 x i64>, <2 x i64>* %0, align 8, !tbaa !2 + %2 = bitcast i64* %dst to <2 x i64>* + store <2 x i64> %1, <2 x i64>* %2, align 8, !tbaa !2 + %arrayidx4 = getelementptr inbounds i64, i64* %src, i64 2 + %arrayidx5 = getelementptr inbounds i64, i64* %dst, i64 2 + %3 = bitcast i64* %arrayidx4 to <2 x i64>* + %4 = load <2 x i64>, <2 x i64>* %3, align 8, !tbaa !2 + %5 = bitcast i64* %arrayidx5 to <2 x i64>* + store <2 x i64> %4, <2 x i64>* %5, align 8, !tbaa !2 + %arrayidx8 = getelementptr inbounds i64, i64* %src, i64 4 + %arrayidx9 = getelementptr inbounds i64, i64* %dst, i64 4 + %6 = bitcast i64* %arrayidx8 to <2 x i64>* + %7 = load <2 x i64>, <2 x i64>* %6, align 8, !tbaa !2 + %8 = bitcast i64* %arrayidx9 to <2 x i64>* + store <2 x i64> %7, <2 x i64>* %8, align 8, !tbaa !2 + %arrayidx12 = getelementptr inbounds i64, i64* %src, i64 6 + %arrayidx13 = getelementptr inbounds i64, i64* %dst, i64 6 + %9 = bitcast i64* %arrayidx12 to <2 x i64>* + %10 = load <2 x i64>, <2 x i64>* %9, align 8, !tbaa !2 + %11 = bitcast i64* %arrayidx13 to <2 x i64>* + store <2 x i64> %10, <2 x i64>* %11, align 8, !tbaa !2 + ret void +} + +attributes #0 = { norecurse nounwind } + +!2 = !{!3, !3, i64 0} +!3 = !{!"long long", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} Index: test/CodeGen/PowerPC/ppcf128-endian.ll =================================================================== --- test/CodeGen/PowerPC/ppcf128-endian.ll +++ test/CodeGen/PowerPC/ppcf128-endian.ll @@ -27,8 +27,8 @@ } ; CHECK: @caller ; CHECK: ld [[REG:[0-9]+]], .LC -; CHECK: lfd 2, 8([[REG]]) ; CHECK: lfd 1, 0([[REG]]) +; CHECK: lfd 2, 8([[REG]]) ; CHECK: bl test declare void @test(ppc_fp128)