Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -2920,6 +2920,20 @@ } }; + /// Determines the optimal series of memory ops to replace the memset / memcpy. + /// Return true if the number of memory ops is below the threshold (Limit). + /// It returns the types of the sequence of memory ops to perform + /// memset / memcpy by reference. + bool findOptimalMemOpLowering(std::vector &MemOps, + unsigned Limit, uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, + bool ZeroMemset, + bool MemcpyStrSrc, + bool AllowOverlap, + unsigned DstAS, unsigned SrcAS, + const Function &F) const; + /// Check to see if the specified operand of the specified instruction is a /// constant integer. If so, check to see if there are any bits set in the /// constant that are not demanded. If so, shrink the constant and return Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5546,111 +5546,6 @@ SrcDelta + G->getOffset()); } -/// Determines the optimal series of memory ops to replace the memset / memcpy. -/// Return true if the number of memory ops is below the threshold (Limit). -/// It returns the types of the sequence of memory ops to perform -/// memset / memcpy by reference. -static bool FindOptimalMemOpLowering(std::vector &MemOps, - unsigned Limit, uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, - bool ZeroMemset, - bool MemcpyStrSrc, - bool AllowOverlap, - unsigned DstAS, unsigned SrcAS, - SelectionDAG &DAG, - const TargetLowering &TLI) { - assert((SrcAlign == 0 || SrcAlign >= DstAlign) && - "Expecting memcpy / memset source to meet alignment requirement!"); - // If 'SrcAlign' is zero, that means the memory operation does not need to - // load the value, i.e. memset or memcpy from constant string. Otherwise, - // it's the inferred alignment of the source. 'DstAlign', on the other hand, - // is the specified alignment of the memory operation. If it is zero, that - // means it's possible to change the alignment of the destination. - // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does - // not need to be loaded. - const Function &F = DAG.getMachineFunction().getFunction(); - EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - IsMemset, ZeroMemset, MemcpyStrSrc, - F.getAttributes()); - - if (VT == MVT::Other) { - // Use the largest integer type whose alignment constraints are satisfied. - // We only need to check DstAlign here as SrcAlign is always greater or - // equal to DstAlign (or zero). - VT = MVT::i64; - while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && - !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) - VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); - assert(VT.isInteger()); - - // Find the largest legal integer type. - MVT LVT = MVT::i64; - while (!TLI.isTypeLegal(LVT)) - LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); - assert(LVT.isInteger()); - - // If the type we've chosen is larger than the largest legal integer type - // then use that instead. - if (VT.bitsGT(LVT)) - VT = LVT; - } - - unsigned NumMemOps = 0; - while (Size != 0) { - unsigned VTSize = VT.getSizeInBits() / 8; - while (VTSize > Size) { - // For now, only use non-vector load / store's for the left-over pieces. - EVT NewVT = VT; - unsigned NewVTSize; - - bool Found = false; - if (VT.isVector() || VT.isFloatingPoint()) { - NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; - if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) && - TLI.isSafeMemOpType(NewVT.getSimpleVT())) - Found = true; - else if (NewVT == MVT::i64 && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) && - TLI.isSafeMemOpType(MVT::f64)) { - // i64 is usually not legal on 32-bit targets, but f64 may be. - NewVT = MVT::f64; - Found = true; - } - } - - if (!Found) { - do { - NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); - if (NewVT == MVT::i8) - break; - } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT())); - } - NewVTSize = NewVT.getSizeInBits() / 8; - - // If the new VT cannot cover all of the remaining bits, then consider - // issuing a (or a pair of) unaligned and overlapping load / store. - bool Fast; - if (NumMemOps && AllowOverlap && NewVTSize < Size && - TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && - Fast) - VTSize = Size; - else { - VT = NewVT; - VTSize = NewVTSize; - } - } - - if (++NumMemOps > Limit) - return false; - - MemOps.push_back(VT); - Size -= VTSize; - } - - return true; -} - static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { // On Darwin, -Os means optimize for size without hurting performance, so // only really optimize for size when -Oz (MinSize) is used. @@ -5717,13 +5612,13 @@ bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); - if (!FindOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), - (isZeroConstant ? 0 : SrcAlign), - false, false, CopyFromConstant, true, - DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), - DAG, TLI)) + if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size, + (DstAlignCanChange ? 0 : Align), + (isZeroConstant ? 0 : SrcAlign), + false, false, CopyFromConstant, true, + DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), + MF.getFunction())) return SDValue(); if (DstAlignCanChange) { @@ -5898,12 +5793,12 @@ SrcAlign = Align; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); - if (!FindOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), SrcAlign, - false, false, false, false, - DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), - DAG, TLI)) + if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size, + (DstAlignCanChange ? 0 : Align), SrcAlign, + false, false, false, false, + DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), + MF.getFunction())) return SDValue(); if (DstAlignCanChange) { @@ -5998,11 +5893,11 @@ DstAlignCanChange = true; bool IsZeroVal = isa(Src) && cast(Src)->isNullValue(); - if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), - Size, (DstAlignCanChange ? 0 : Align), 0, - true, IsZeroVal, false, true, - DstPtrInfo.getAddrSpace(), ~0u, - DAG, TLI)) + if (!TLI.findOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), + Size, (DstAlignCanChange ? 0 : Align), 0, + true, IsZeroVal, false, true, + DstPtrInfo.getAddrSpace(), ~0u, + MF.getFunction())) return SDValue(); if (DstAlignCanChange) { Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -153,6 +153,107 @@ return LowerCallTo(CLI); } +bool +TargetLowering::findOptimalMemOpLowering(std::vector &MemOps, + unsigned Limit, uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, + bool ZeroMemset, + bool MemcpyStrSrc, + bool AllowOverlap, + unsigned DstAS, unsigned SrcAS, + const Function &F) const { + // If 'SrcAlign' is zero, that means the memory operation does not need to + // load the value, i.e. memset or memcpy from constant string. Otherwise, + // it's the inferred alignment of the source. 'DstAlign', on the other hand, + // is the specified alignment of the memory operation. If it is zero, that + // means it's possible to change the alignment of the destination. + // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does + // not need to be loaded. + if (!(SrcAlign == 0 || SrcAlign >= DstAlign)) + return false; + + EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign, + IsMemset, ZeroMemset, MemcpyStrSrc, + F.getAttributes()); + + if (VT == MVT::Other) { + // Use the largest integer type whose alignment constraints are satisfied. + // We only need to check DstAlign here as SrcAlign is always greater or + // equal to DstAlign (or zero). + VT = MVT::i64; + while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && + !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); + assert(VT.isInteger()); + + // Find the largest legal integer type. + MVT LVT = MVT::i64; + while (!isTypeLegal(LVT)) + LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); + assert(LVT.isInteger()); + + // If the type we've chosen is larger than the largest legal integer type + // then use that instead. + if (VT.bitsGT(LVT)) + VT = LVT; + } + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned VTSize = VT.getSizeInBits() / 8; + while (VTSize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + EVT NewVT = VT; + unsigned NewVTSize; + + bool Found = false; + if (VT.isVector() || VT.isFloatingPoint()) { + NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; + if (isOperationLegalOrCustom(ISD::STORE, NewVT) && + isSafeMemOpType(NewVT.getSimpleVT())) + Found = true; + else if (NewVT == MVT::i64 && + isOperationLegalOrCustom(ISD::STORE, MVT::f64) && + isSafeMemOpType(MVT::f64)) { + // i64 is usually not legal on 32-bit targets, but f64 may be. + NewVT = MVT::f64; + Found = true; + } + } + + if (!Found) { + do { + NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); + if (NewVT == MVT::i8) + break; + } while (!isSafeMemOpType(NewVT.getSimpleVT())); + } + NewVTSize = NewVT.getSizeInBits() / 8; + + // If the new VT cannot cover all of the remaining bits, then consider + // issuing a (or a pair of) unaligned and overlapping load / store. + bool Fast; + if (NumMemOps && AllowOverlap && NewVTSize < Size && + allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && + Fast) + VTSize = Size; + else { + VT = NewVT; + VTSize = NewVTSize; + } + } + + if (++NumMemOps > Limit) + return false; + + MemOps.push_back(VT); + Size -= VTSize; + } + + return true; +} + /// Soften the operands of a comparison. This code is shared among BR_CC, /// SELECT_CC, and SETCC handlers. void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,