Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -171,6 +171,26 @@ void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + /// This is used by the pre-regalloc scheduler to determine if two loads are + /// loading from the same base address. It should only return true if the base + /// pointers are the same and the only differences between the two addresses + /// are the offset. It also returns the offsets by reference. + bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, + int64_t &Offset1, + int64_t &Offset2) const override; + + /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to + /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should + /// be scheduled togther. On some targets if two loads are loading from + /// addresses in the same cache line, it's better if they are scheduled + /// together. This function takes two integers that represent the load offsets + /// from the common base address. It returns true if it decides it's desirable + /// to schedule the two loads together. "NumLoads" is the number of loads that + /// have already been scheduled after Load1. + bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const override; + // Branch analysis. bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -40,6 +40,9 @@ using namespace llvm; +static cl::opt VectorLoadClusterCount("vec-load-clustering", cl::Hidden, cl::init(3)); +static cl::opt ScalarLoadClusterCount("scalar-load-clustering", cl::Hidden, cl::init(3)); + #define DEBUG_TYPE "ppc-instr-info" #define GET_INSTRMAP_INFO @@ -1929,3 +1932,105 @@ return &PPC::VSRCRegClass; return RC; } + + +static MachineMemOperand* extractMemOp(MachineSDNode *Load) { + MachineMemOperand **IMemOp = Load->memoperands_begin(); + MachineMemOperand* MMO = nullptr; + + if(IMemOp) { + MMO = *IMemOp; + assert(++IMemOp == Load->memoperands_end() && + "Expect a single memory operand in a load"); + } + + return MMO; +} + +// Some machine instructions may have both mayLoad and mayStore flags set. +// These instructions are lowered from intrinsics that don't actually +// touch memory, but can not have the IntrNoMem flags becuase it needs to be +// inserted into a chain due to their side-effects. These side-effect only +// intrinsics will have both mayLoad and mayStore flags set. +static bool mayActuallyLoad(MCInstrDesc Desc) { + return Desc.mayLoad() && !Desc.mayStore(); +} + +// gets the chain operand from an SDNode. +static SDValue getChainOperand(SDNode *Node) { + // Loop past any glue nodes. + unsigned OpIndex = Node->getNumOperands(); + while(OpIndex && Node->getOperand(OpIndex - 1).getValueType() == MVT::Glue) { + --OpIndex; + } + + assert(OpIndex && "expected at least one Operand after Glue operands!"); + // Opindex is either the index of the last glue node, or the totoal number of + // operands if there are no glue nodes. The chain operand must be the previous + // operand. + SDValue ChainOp = Node->getOperand(--OpIndex); + assert(ChainOp.getValueType() == MVT::Other && + "Expected Chain Operand on mayLoad MachineSDNode!"); + return ChainOp; +} + +bool PPCInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, + int64_t &Offset1, int64_t &Offset2) const { + + // Only interested in MachineSDNodes + if(!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) { + return false; + } + + const MCInstrDesc &MCIDesc1 = get(Load1->getMachineOpcode()); + const MCInstrDesc &MCIDesc2 = get(Load2->getMachineOpcode()); + // Only interested in 'real' loads. + if(!mayActuallyLoad(MCIDesc1) || MCIDesc1.isPseudo() || + !mayActuallyLoad(MCIDesc2) || MCIDesc2.isPseudo()) { + return false; + } + + // only interested in Loads in the same chain. + if(getChainOperand(Load1) != getChainOperand(Load2)) { + return false; + } + + // Get the memory operands + MachineSDNode *MachineLoad1 = dyn_cast(Load1); + MachineSDNode *MachineLoad2 = dyn_cast(Load2); + assert(MachineLoad1 && MachineLoad1); + MachineMemOperand *MemOp1 = extractMemOp(MachineLoad1); + MachineMemOperand *MemOp2 = extractMemOp(MachineLoad2); + + // Not every load will have its MMO properly set. For example the loads + // created from intrinsic calls may not have them set. + if(!MemOp1 || !MemOp2) + return false; + + // Check that the memory ops use the same base value + if(MemOp1->getValue() == MemOp2->getValue()) { + Offset1 = MemOp1->getOffset(); + Offset2 = MemOp2->getOffset(); + return true; + } + + // Loads are off different base values. + return false; +} + +bool PPCInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const { + assert(Offset2 > Offset1 && "Offset2 must larger then Offset1!"); + + // Check that the loads are whithin a cacheline of each other. + if(Offset2 - Offset1 > 128) + return false; + + EVT VT = Load1->getValueType(0); + if (VT.isVector()) { + return NumLoads < VectorLoadClusterCount ; + } + + return NumLoads < ScalarLoadClusterCount; +} Index: test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll =================================================================== --- test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll +++ test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll @@ -31,11 +31,12 @@ ; PPC64-P8: blr ; PPC32-DAG: stfd 1, 24(1) -; PPC32-DAG: stfd 2, 16(1) +; PPC32: nop ; PPC32: nop ; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1) -; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1) ; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1) +; PPC32-DAG: stfd 2, 16(1) +; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1) ; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1) ; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0 ; PPC32-DAG: xor [[HI0]], [[HI0]], [[FLIP_BIT]] Index: test/CodeGen/PowerPC/jaggedstructs.ll =================================================================== --- test/CodeGen/PowerPC/jaggedstructs.ll +++ test/CodeGen/PowerPC/jaggedstructs.ll @@ -35,10 +35,10 @@ ; CHECK: sth {{[0-9]+}}, 70(1) ; CHECK: stw {{[0-9]+}}, 66(1) ; CHECK: lbz {{[0-9]+}}, 191(1) -; CHECK: lhz {{[0-9]+}}, 189(1) ; CHECK: lwz {{[0-9]+}}, 185(1) -; CHECK: stb {{[0-9]+}}, 79(1) +; CHECK: lhz {{[0-9]+}}, 189(1) ; CHECK: sth {{[0-9]+}}, 77(1) +; CHECK: stb {{[0-9]+}}, 79(1) ; CHECK: stw {{[0-9]+}}, 73(1) ; CHECK: ld 6, 72(1) ; CHECK: ld 5, 64(1) Index: test/CodeGen/PowerPC/memcpy-vec.ll =================================================================== --- test/CodeGen/PowerPC/memcpy-vec.ll +++ test/CodeGen/PowerPC/memcpy-vec.ll @@ -15,10 +15,10 @@ ; PWR7-LABEL: @foo1 ; PWR7-NOT: bl memcpy ; PWR7-DAG: li [[OFFSET:[0-9]+]], 16 -; PWR7-DAG: lxvd2x [[TMP0:[0-9]+]], 4, [[OFFSET]] +; PWR7-DAG: lxvd2x [[TMP0:[0-9]+]], 0, 4 +; PWR7-DAG: lxvd2x [[TMP1:[0-9]+]], 4, [[OFFSET]] +; PWR7-DAG: stxvd2x [[TMP1]], 3, [[OFFSET]] ; PWR7-DAG: stxvd2x [[TMP0]], 0, 3 -; PWR7-DAG: lxvd2x [[TMP1:[0-9]+]], 0, 4 -; PWR7-DAG: stxvd2x [[TMP1]], 0, 3 ; PWR7: blr ; PWR8-LABEL: @foo1 Index: test/CodeGen/PowerPC/ppc32-vacopy.ll =================================================================== --- test/CodeGen/PowerPC/ppc32-vacopy.ll +++ test/CodeGen/PowerPC/ppc32-vacopy.ll @@ -19,6 +19,6 @@ ; CHECK: lwz [[REG1:[0-9]+]], {{.*}} ; CHECK: lwz [[REG2:[0-9]+]], {{.*}} ; CHECK: lwz [[REG3:[0-9]+]], {{.*}} -; CHECK: stw [[REG1]], {{.*}} -; CHECK: stw [[REG2]], {{.*}} ; CHECK: stw [[REG3]], {{.*}} +; CHECK: stw [[REG2]], {{.*}} +; CHECK: stw [[REG1]], {{.*}} Index: test/CodeGen/PowerPC/ppcf128-endian.ll =================================================================== --- test/CodeGen/PowerPC/ppcf128-endian.ll +++ test/CodeGen/PowerPC/ppcf128-endian.ll @@ -27,8 +27,8 @@ } ; CHECK: @caller ; CHECK: ld [[REG:[0-9]+]], .LC -; CHECK: lfd 2, 8([[REG]]) ; CHECK: lfd 1, 0([[REG]]) +; CHECK: lfd 2, 8([[REG]]) ; CHECK: bl test declare void @test(ppc_fp128)