Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -53,6 +54,83 @@ using namespace llvm; +#define DEBUG_TYPE "mem_funcs" + +// Stats related to expansion of memcpy calls. +STATISTIC(NumMemcpyCalls, "Count of all memcpy calls lowered"); +STATISTIC(NumMemcpyNonConstSize, + "memcpy calls whose size is not known at compiler time"); +STATISTIC(NumMemcpyTooLarge, + "memcpy calls with a known size larger then the max we expand"); +STATISTIC(NumMemCpyTargetExpanded, + "memcpy calls expanded with the target specific expansion"); +// Align related stats for memcpy +STATISTIC(NumMemcpyUnaligned, + "memcpy calls whose operands have worst case alignment info"); +STATISTIC(NumMemcpyAligned16, + "memcpy calls whose operands are 16 byte aligned"); +// Size related stats for memcpy +STATISTIC(NumSmallMemcpy, "memcpy calls with size <= 32"); +STATISTIC(NumMedMemcpy, "memcpy calls with 32 < size <= 128"); +STATISTIC(NumLargeMemcpy, "memcpy calls with 128 < size"); + + +// Stats related to expansion of memove calls. +STATISTIC(NumMemmoveCalls, "Count of all memmove calls lowered"); +STATISTIC(NumMemmoveTooLarge, + "memmove calls with a known size larger then the max we expand"); +STATISTIC(NumMemmoveNonConstSize, + "memmove calls whose size is not known at compile time"); +STATISTIC(NumMemmoveTargetExpanded, + "memmove calls expanded with the target specific expansion"); +// Align related stats for memmove +STATISTIC(NumMemmoveUnaligned, + "memmove calls whose operands have worst case alignment info"); +STATISTIC(NumMemmoveAligned16, + "memmove calls whose operands are 16 byte aligned"); +// Size related stats for memmove +STATISTIC(NumSmallMemmove, "memmove calls with size <= 32"); +STATISTIC(NumMedMemmove, "memmove calls with 32 < size <= 128"); +STATISTIC(NumLargeMemmove, "memmove calls with 128 < size"); + +// Stats relate to expansion of memset calls. +STATISTIC(NumMemsetCalls, "Count of all memset calls lowered"); +STATISTIC(NumMemsetNonConstSize, + "memset calls whose size is not known at compiler time"); +STATISTIC(NumMemsetTooLarge, + "memset calls with a known size larger then the max we expand"); +STATISTIC(NumMemsetTargetExpanded, + "memset calls expanded with the target specific expansion"); +// Align related stats for memset +STATISTIC(NumMemsetUnaligned, + "memset calls that have worst case alignment info"); +STATISTIC(NumMemsetAligned16, + "memmove calls whose operand is 16 byte aligned"); +// Size related stats for memmove +STATISTIC(NumSmallMemset, "memset calls with size <= 32"); +STATISTIC(NumMedMemset, "memset calls with 32 < size <= 128"); +STATISTIC(NumLargeMemset, "memset calls with 128 < size"); + +static void +memAlignStat(unsigned Align, Statistic * const Unaligned, + Statistic * const VectorAligned) { + if (Align == 0 || Align == 1) + *Unaligned += 1; + else if (Align == 16) + *VectorAligned += 1; +} + +static void +memSizeStat(unsigned Size, Statistic * const Small, Statistic *const Med, + Statistic * const Large) { + if (Size <= 32) + *Small += 1; + else if (Size <= 128) + *Med += 1; + else + *Large += 1; +} + /// makeVTList - Return an instance of the SDVTList struct initialized with the /// specified members. static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { @@ -4685,8 +4763,12 @@ false, false, CopyFromStr, true, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), - DAG, TLI)) + DAG, TLI)) { + // If we failed to find an Optimal memory-op lowering its because the + // expansion is too large. + ++NumMemcpyTooLarge; return SDValue(); + } if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); @@ -4797,8 +4879,12 @@ false, false, false, false, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), - DAG, TLI)) + DAG, TLI)) { + // If we failed to find an Optimal memory-op lowering its because the + // expansion is too large. + ++NumMemmoveTooLarge; return SDValue(); + } if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); @@ -4890,8 +4976,12 @@ Size, (DstAlignCanChange ? 0 : Align), 0, true, IsZeroVal, false, true, DstPtrInfo.getAddrSpace(), ~0u, - DAG, TLI)) + DAG, TLI)) { + // If we failed to find an Optimal memory-op lowering its because the + // expansion is too large. + ++NumMemsetTooLarge; return SDValue(); + } if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); @@ -4965,6 +5055,9 @@ MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + ++NumMemcpyCalls; + memAlignStat(Align, &NumMemcpyUnaligned, &NumMemcpyAligned16); + // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast(Size); @@ -4973,11 +5066,16 @@ if (ConstantSize->isNullValue()) return Chain; + uint64_t Size = ConstantSize->getZExtValue(); + memSizeStat(Size, &NumSmallMemcpy, &NumMedMemcpy, &NumLargeMemcpy); SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(),Align, - isVol, false, DstPtrInfo, SrcPtrInfo); + Size, Align, isVol, false, + DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; + } else { + // Size isn't know at compile time. + ++NumMemcpyNonConstSize; } // Then check to see if we should lower the memcpy with target-specific @@ -4986,8 +5084,10 @@ SDValue Result = TSI->EmitTargetCodeForMemcpy( *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo); - if (Result.getNode()) + if (Result.getNode()) { + ++NumMemCpyTargetExpanded; return Result; + } } // If we really need inline code and the target declined to provide it, @@ -5037,6 +5137,8 @@ MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + ++NumMemmoveCalls; + memAlignStat(Align, &NumMemmoveUnaligned, &NumMemmoveAligned16); // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -5046,12 +5148,16 @@ if (ConstantSize->isNullValue()) return Chain; + uint64_t Size = ConstantSize->getZExtValue(); + memSizeStat(Size, &NumSmallMemmove, &NumMedMemmove, &NumLargeMemmove); SDValue Result = getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Align, isVol, - false, DstPtrInfo, SrcPtrInfo); + Size, Align, isVol, false, DstPtrInfo, + SrcPtrInfo); if (Result.getNode()) return Result; + } else { + ++NumMemmoveNonConstSize; } // Then check to see if we should lower the memmove with target-specific @@ -5059,8 +5165,10 @@ if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemmove( *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); - if (Result.getNode()) + if (Result.getNode()) { + ++NumMemmoveTargetExpanded; return Result; + } } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); @@ -5097,6 +5205,8 @@ bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + ++NumMemsetCalls; + memAlignStat(Align, &NumMemsetUnaligned, &NumMemsetAligned16); // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. @@ -5106,12 +5216,16 @@ if (ConstantSize->isNullValue()) return Chain; + uint64_t Size = ConstantSize->getZExtValue(); + memSizeStat(Size, &NumSmallMemset, &NumMedMemset, &NumLargeMemset); SDValue Result = - getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), - Align, isVol, DstPtrInfo); + getMemsetStores(*this, dl, Chain, Dst, Src, Size, Align, isVol, + DstPtrInfo); if (Result.getNode()) return Result; + } else { + ++NumMemsetNonConstSize; } // Then check to see if we should lower the memset with target-specific @@ -5119,8 +5233,10 @@ if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemset( *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo); - if (Result.getNode()) + if (Result.getNode()) { + ++NumMemsetTargetExpanded; return Result; + } } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -73,6 +73,13 @@ #define DEBUG_TYPE "isel" +STATISTIC(NumMemcmpCalls, "Count of all memcmp calls lowered"); +STATISTIC(NumMemcmpNonConstSize, + "memcmp calls whose size is not known at compiler time"); +STATISTIC(NumSmallMemcmp, "memcmp calls with size <= 32"); +STATISTIC(NumMedMemcmp, "memcmp calls with 32 < size <= 128"); +STATISTIC(NumLargMemcmp, "memcmp calls with 128 < size"); + /// LimitFloatPrecision - Generate low-precision inline sequences for /// some float libcalls (6, 8 or 12 bits). static unsigned LimitFloatPrecision; @@ -6058,6 +6065,8 @@ /// If so, return true and lower it, otherwise return false and it will be /// lowered like a normal call. bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { + ++NumMemcmpCalls; + // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) if (I.getNumArgOperands() != 3) return false; @@ -6077,6 +6086,18 @@ return true; } + if(!CSize) { + ++NumMemcmpNonConstSize; + } else { + uint64_t CmpSize = CSize->getZExtValue(); + if (CmpSize <= 32) + ++NumSmallMemcmp; + else if (CmpSize <= 128) + ++NumMedMemcmp; + else + ++NumLargMemcmp; + } + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -102,7 +102,8 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt DisableILPPref("disable-ppc-ilp-pref", -cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); +cl::desc("disable setting the node scheduling preference to ILP on PPC"), + cl::Hidden); static cl::opt DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); @@ -110,6 +111,18 @@ static cl::opt DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden); +static cl::opt OpMaxStoresPerMemset("max-stores-memset", +cl::desc("Set the maximum number of stores in a memset expansion"), +cl::Hidden, cl::init(8U)); + +static cl::opt OpMaxStoresPerMemcpy("max-stores-memcpy", +cl::desc("Set the maximum number of Load/Store pairs in a memcpy expansion"), +cl::Hidden, cl::init(8U)); + +static cl::opt OpMaxStoresPerMemmove("max-stores-memmove", +cl::desc("Set the maximum number of Load/Store pairs in a memmove expansion"), +cl::Hidden, cl::init(8U)); + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); @@ -147,6 +160,7 @@ setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); + setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); @@ -1020,6 +1034,10 @@ MaxStoresPerMemset = 128; MaxStoresPerMemcpy = 128; MaxStoresPerMemmove = 128; + } else { + MaxStoresPerMemset = OpMaxStoresPerMemset; + MaxStoresPerMemcpy = OpMaxStoresPerMemcpy; + MaxStoresPerMemmove = OpMaxStoresPerMemmove; } }