Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -53,6 +54,94 @@ using namespace llvm; +#define DEBUG_TYPE "mem_funcs" + +// Stats related to expansion of memcpy calls. +STATISTIC(NumMemcpyCalls, "Count of all memcpy calls lowered"); +STATISTIC(NumMemcpyNonConstSize, + "memcpy calls whose size is not known at compile time"); +STATISTIC(NumMemcpyTooLarge, + "memcpy calls with a known size larger then the max we expand"); +STATISTIC(NumMemCpyTargetExpanded, + "memcpy calls expanded with the target specific expansion"); +// Align related stats for memcpy +STATISTIC(NumMemcpyUnaligned, + "memcpy calls whose operands have worst case alignment info"); +STATISTIC(NumMemcpyAligned16, + "memcpy calls whose operands are 16 byte aligned"); +// Size related stats for memcpy +STATISTIC(NumMemcpy8, "memcpy calls with size <= 8"); +STATISTIC(NumMemcpy32, "memcpy calls with 8 < size <= 32"); +STATISTIC(NumMemcpy128, "memcpy calls with 32 < size <= 128"); +STATISTIC(NumMemcpy384, "memcpy calls with 128 < size <= 384"); +STATISTIC(NumMemcpyXL, "memcpy calls with 384 < size"); + + +// Stats related to expansion of memmove calls. +STATISTIC(NumMemmoveCalls, "Count of all memmove calls lowered"); +STATISTIC(NumMemmoveTooLarge, + "memmove calls with a known size larger then the max we expand"); +STATISTIC(NumMemmoveNonConstSize, + "memmove calls whose size is not known at compile time"); +STATISTIC(NumMemmoveTargetExpanded, + "memmove calls expanded with the target specific expansion"); +// Align related stats for memmove +STATISTIC(NumMemmoveUnaligned, + "memmove calls whose operands have worst case alignment info"); +STATISTIC(NumMemmoveAligned16, + "memmove calls whose operands are 16 byte aligned"); +// Size related stats for memmove +STATISTIC(NumMemmove8, "memmove calls with size <= 8"); +STATISTIC(NumMemmove32, "memmove calls with 8 < size <= 32"); +STATISTIC(NumMemmove128, "memmove calls with 32 < size <= 128"); +STATISTIC(NumMemmove384, "memmove calls with 128 < size <= 384"); +STATISTIC(NumMemmoveXL, "memmove calls with 384 < size"); + +// Stats relate to expansion of memset calls. +STATISTIC(NumMemsetCalls, "Count of all memset calls lowered"); +STATISTIC(NumMemsetNonConstSize, + "memset calls whose size is not known at compile time"); +STATISTIC(NumMemsetTooLarge, + "memset calls with a known size larger then the max we expand"); +STATISTIC(NumMemsetTargetExpanded, + "memset calls expanded with the target specific expansion"); +// Align related stats for memset +STATISTIC(NumMemsetUnaligned, + "memset calls whose operands have worst case alignment info"); +STATISTIC(NumMemsetAligned16, + "memmove calls whose operand is 16 byte aligned"); +// Size related stats for memmove +STATISTIC(NumMemset8, "memset calls with size <= 8"); +STATISTIC(NumMemset32, "memset calls with 8 < size <= 32"); +STATISTIC(NumMemset128, "memset calls with 32 < size <= 128"); +STATISTIC(NumMemset384, "memset calls with 128 < size <= 384"); +STATISTIC(NumMemsetXL, "memset calls with 384 < size"); + +static void +memAlignStat(unsigned Align, Statistic * const Unaligned, + Statistic * const VectorAligned) { + if (Align == 0 || Align == 1) + *Unaligned += 1; + else if (Align >= 16) + *VectorAligned += 1; +} + +static void +memSizeStat(unsigned Size, Statistic * const Size8, Statistic *const Size32, + Statistic * const Size128, Statistic * const Size384, + Statistic * const SizeXL) { + if (Size <= 8) + *Size8 += 1; + else if (Size <= 32) + *Size32 += 1; + else if (Size <= 128) + *Size128 += 1; + else if (Size <= 384) + *Size384 += 1; + else + *SizeXL += 1; +} + /// makeVTList - Return an instance of the SDVTList struct initialized with the /// specified members. static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { @@ -4685,8 +4774,12 @@ false, false, CopyFromStr, true, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), - DAG, TLI)) + DAG, TLI)) { + // If we failed to find an Optimal memory-op lowering its because the + // expansion is too large. + ++NumMemcpyTooLarge; return SDValue(); + } if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); @@ -4797,8 +4890,12 @@ false, false, false, false, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), - DAG, TLI)) + DAG, TLI)) { + // If we failed to find an Optimal memory-op lowering its because the + // expansion is too large. + ++NumMemmoveTooLarge; return SDValue(); + } if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); @@ -4890,8 +4987,12 @@ Size, (DstAlignCanChange ? 0 : Align), 0, true, IsZeroVal, false, true, DstPtrInfo.getAddrSpace(), ~0u, - DAG, TLI)) + DAG, TLI)) { + // If we failed to find an Optimal memory-op lowering its because the + // expansion is too large. + ++NumMemsetTooLarge; return SDValue(); + } if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); @@ -4965,6 +5066,9 @@ MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + ++NumMemcpyCalls; + memAlignStat(Align, &NumMemcpyUnaligned, &NumMemcpyAligned16); + // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast(Size); @@ -4973,11 +5077,17 @@ if (ConstantSize->isNullValue()) return Chain; + uint64_t Size = ConstantSize->getZExtValue(); + memSizeStat(Size, &NumMemcpy8, &NumMemcpy32, &NumMemcpy128, + &NumMemcpy384, &NumMemcpyXL); SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(),Align, - isVol, false, DstPtrInfo, SrcPtrInfo); + Size, Align, isVol, false, + DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; + } else { + // Size isn't know at compile time. + ++NumMemcpyNonConstSize; } // Then check to see if we should lower the memcpy with target-specific @@ -4986,8 +5096,10 @@ SDValue Result = TSI->EmitTargetCodeForMemcpy( *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo); - if (Result.getNode()) + if (Result.getNode()) { + ++NumMemCpyTargetExpanded; return Result; + } } // If we really need inline code and the target declined to provide it, @@ -5037,6 +5149,8 @@ MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + ++NumMemmoveCalls; + memAlignStat(Align, &NumMemmoveUnaligned, &NumMemmoveAligned16); // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -5046,12 +5160,17 @@ if (ConstantSize->isNullValue()) return Chain; + uint64_t Size = ConstantSize->getZExtValue(); + memSizeStat(Size, &NumMemmove8, &NumMemmove32, &NumMemmove128, + &NumMemmove384, &NumMemmoveXL); SDValue Result = getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Align, isVol, - false, DstPtrInfo, SrcPtrInfo); + Size, Align, isVol, false, DstPtrInfo, + SrcPtrInfo); if (Result.getNode()) return Result; + } else { + ++NumMemmoveNonConstSize; } // Then check to see if we should lower the memmove with target-specific @@ -5059,8 +5178,10 @@ if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemmove( *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); - if (Result.getNode()) + if (Result.getNode()) { + ++NumMemmoveTargetExpanded; return Result; + } } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); @@ -5097,6 +5218,8 @@ bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + ++NumMemsetCalls; + memAlignStat(Align, &NumMemsetUnaligned, &NumMemsetAligned16); // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. @@ -5106,12 +5229,17 @@ if (ConstantSize->isNullValue()) return Chain; + uint64_t Size = ConstantSize->getZExtValue(); + memSizeStat(Size, &NumMemset8, &NumMemset32, &NumMemset128, + &NumMemset384, &NumMemsetXL); SDValue Result = - getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), - Align, isVol, DstPtrInfo); + getMemsetStores(*this, dl, Chain, Dst, Src, Size, Align, isVol, + DstPtrInfo); if (Result.getNode()) return Result; + } else { + ++NumMemsetNonConstSize; } // Then check to see if we should lower the memset with target-specific @@ -5119,8 +5247,10 @@ if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemset( *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo); - if (Result.getNode()) + if (Result.getNode()) { + ++NumMemsetTargetExpanded; return Result; + } } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -73,6 +73,13 @@ #define DEBUG_TYPE "isel" +STATISTIC(NumMemcmpCalls, "Count of all memcmp calls lowered"); +STATISTIC(NumMemcmpNonConstSize, + "memcmp calls whose size is not known at compile time"); +STATISTIC(NumSmallMemcmp, "memcmp calls with size <= 32"); +STATISTIC(NumMedMemcmp, "memcmp calls with 32 < size <= 128"); +STATISTIC(NumLargMemcmp, "memcmp calls with 128 < size"); + /// LimitFloatPrecision - Generate low-precision inline sequences for /// some float libcalls (6, 8 or 12 bits). static unsigned LimitFloatPrecision; @@ -6058,6 +6065,8 @@ /// If so, return true and lower it, otherwise return false and it will be /// lowered like a normal call. bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { + ++NumMemcmpCalls; + // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) if (I.getNumArgOperands() != 3) return false; @@ -6077,6 +6086,18 @@ return true; } + if(!CSize) { + ++NumMemcmpNonConstSize; + } else { + uint64_t CmpSize = CSize->getZExtValue(); + if (CmpSize <= 32) + ++NumSmallMemcmp; + else if (CmpSize <= 128) + ++NumMedMemcmp; + else + ++NumLargMemcmp; + } + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -102,7 +102,8 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt DisableILPPref("disable-ppc-ilp-pref", -cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); +cl::desc("disable setting the node scheduling preference to ILP on PPC"), + cl::Hidden); static cl::opt DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); @@ -110,6 +111,18 @@ static cl::opt DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden); +static cl::opt OpMaxStoresPerMemset("max-stores-memset", +cl::desc("Set the maximum number of stores in a memset expansion"), +cl::Hidden, cl::init(8U)); + +static cl::opt OpMaxStoresPerMemcpy("max-stores-memcpy", +cl::desc("Set the maximum number of Load/Store pairs in a memcpy expansion"), +cl::Hidden, cl::init(8U)); + +static cl::opt OpMaxStoresPerMemmove("max-stores-memmove", +cl::desc("Set the maximum number of Load/Store pairs in a memmove expansion"), +cl::Hidden, cl::init(8U)); + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); @@ -147,6 +160,7 @@ setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); + setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); @@ -1020,6 +1034,10 @@ MaxStoresPerMemset = 128; MaxStoresPerMemcpy = 128; MaxStoresPerMemmove = 128; + } else { + MaxStoresPerMemset = OpMaxStoresPerMemset; + MaxStoresPerMemcpy = OpMaxStoresPerMemcpy; + MaxStoresPerMemmove = OpMaxStoresPerMemmove; } }