diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -106,6 +106,49 @@ } // end namespace Sched +// MemOp models a memory operation, either memset or memcpy/memmove. +struct MemOp { + // Shared + uint64_t Size; + unsigned DstAlign; // Specified alignment of the memory operation or zero if + // destination alignment can satisfy any constraint. + bool AllowOverlap; + // memset only + bool IsMemset; // If setthis memory operation is a memset. + bool ZeroMemset; // If set clears out memory with zeros. + // memcpy only + bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register + // constant so it does not need to be loaded. + unsigned SrcAlign; // Inferred alignment of the source or zero if the memory + // operation does not need to load the value. + + static MemOp Copy(uint64_t Size, bool DstAlignCanChange, unsigned DstAlign, + unsigned SrcAlign, bool IsVolatile, + bool MemcpyStrSrc = false) { + return { + /*.Size =*/Size, + /*.DstAlign =*/DstAlignCanChange ? 0 : DstAlign, + /*.AllowOverlap =*/!IsVolatile, + /*.IsMemset =*/false, + /*.ZeroMemset =*/false, + /*.MemcpyStrSrc =*/MemcpyStrSrc, + /*.SrcAlign =*/SrcAlign, + }; + } + static MemOp Set(uint64_t Size, bool DstAlignCanChange, unsigned DstAlign, + bool IsZeroMemset, bool IsVolatile) { + return { + /*.Size =*/Size, + /*.DstAlign =*/DstAlignCanChange ? 0 : DstAlign, + /*.AllowOverlap =*/!IsVolatile, + /*.IsMemset =*/true, + /*.ZeroMemset =*/IsZeroMemset, + /*.MemcpyStrSrc =*/false, + /*.SrcAlign =*/0, + }; + } +}; + /// This base class for TargetLowering contains the SelectionDAG-independent /// parts that can be used from the rest of CodeGen. class TargetLoweringBase { @@ -1518,29 +1561,17 @@ /// Returns the target specific optimal type for load and store operations as /// a result of memset, memcpy, and memmove lowering. - /// - /// If DstAlign is zero that means it's safe to destination alignment can - /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't - /// a need to check it against alignment requirement, probably because the - /// source does not need to be loaded. If 'IsMemset' is true, that means it's - /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of - /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it - /// does not need to be loaded. It returns EVT::Other if the type should be - /// determined using generic target-independent logic. + /// It returns EVT::Other if the type should be determined using generic + /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/, - unsigned /*SrcAlign*/, bool /*IsMemset*/, - bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/, + getOptimalMemOpType(const MemOp &Op, const AttributeList & /*FuncAttributes*/) const { return MVT::Other; } - /// LLT returning variant. virtual LLT - getOptimalMemOpLLT(uint64_t /*Size*/, unsigned /*DstAlign*/, - unsigned /*SrcAlign*/, bool /*IsMemset*/, - bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/, + getOptimalMemOpLLT(const MemOp &Op, const AttributeList & /*FuncAttributes*/) const { return LLT(); } @@ -3102,14 +3133,8 @@ /// Return true if the number of memory ops is below the threshold (Limit). /// It returns the types of the sequence of memory ops to perform /// memset / memcpy by reference. - bool findOptimalMemOpLowering(std::vector &MemOps, - unsigned Limit, uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, - bool ZeroMemset, - bool MemcpyStrSrc, - bool AllowOverlap, - unsigned DstAS, unsigned SrcAS, + bool findOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, + const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const; /// Check to see if the specified operand of the specified instruction is a diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -855,37 +855,30 @@ // Returns a list of types to use for memory op lowering in MemOps. A partial // port of findOptimalMemOpLowering in TargetLowering. -static bool findGISelOptimalMemOpLowering( - std::vector &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, - bool AllowOverlap, unsigned DstAS, unsigned SrcAS, - const AttributeList &FuncAttributes, const TargetLowering &TLI) { - // If 'SrcAlign' is zero, that means the memory operation does not need to - // load the value, i.e. memset or memcpy from constant string. Otherwise, - // it's the inferred alignment of the source. 'DstAlign', on the other hand, - // is the specified alignment of the memory operation. If it is zero, that - // means it's possible to change the alignment of the destination. - // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does - // not need to be loaded. - if (SrcAlign != 0 && SrcAlign < DstAlign) +static bool findGISelOptimalMemOpLowering(std::vector &MemOps, + unsigned Limit, const MemOp &Op, + unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes, + const TargetLowering &TLI) { + if (Op.SrcAlign != 0 && Op.SrcAlign < Op.DstAlign) return false; - LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset, - ZeroMemset, MemcpyStrSrc, FuncAttributes); + LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes); if (Ty == LLT()) { // Use the largest scalar type whose alignment constraints are satisfied. // We only need to check DstAlign here as SrcAlign is always greater or // equal to DstAlign (or zero). Ty = LLT::scalar(64); - while (DstAlign && DstAlign < Ty.getSizeInBytes() && - !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign)) + while (Op.DstAlign && Op.DstAlign < Ty.getSizeInBytes() && + !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.DstAlign)) Ty = LLT::scalar(Ty.getSizeInBytes()); assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); // FIXME: check for the largest legal type we can load/store to. } unsigned NumMemOps = 0; + auto Size = Op.Size; while (Size != 0) { unsigned TySize = Ty.getSizeInBytes(); while (TySize > Size) { @@ -904,9 +897,9 @@ bool Fast; // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). MVT VT = getMVTForLLT(Ty); - if (NumMemOps && AllowOverlap && NewTySize < Size && + if (NumMemOps && Op.AllowOverlap && NewTySize < Size && TLI.allowsMisalignedMemoryAccesses( - VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) && + VT, DstAS, Op.DstAlign, MachineMemOperand::MONone, &Fast) && Fast) TySize = Size; else { @@ -988,12 +981,13 @@ auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; - if (!findGISelOptimalMemOpLowering( - MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0, - /*IsMemset=*/true, - /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, - /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u, - MF.getFunction().getAttributes(), TLI)) + if (!findGISelOptimalMemOpLowering(MemOps, Limit, + MemOp::Set(KnownLen, DstAlignCanChange, + Align, + /*IsZeroMemset=*/IsZeroVal, + /*IsVolatile=*/IsVolatile), + DstPtrInfo.getAddrSpace(), ~0u, + MF.getFunction().getAttributes(), TLI)) return false; if (DstAlignCanChange) { @@ -1107,12 +1101,11 @@ MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); if (!findGISelOptimalMemOpLowering( - MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), - SrcAlign, - /*IsMemset=*/false, - /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, - /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) + MemOps, Limit, + MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, + IsVolatile), + DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MF.getFunction().getAttributes(), TLI)) return false; if (DstAlignCanChange) { @@ -1214,12 +1207,11 @@ // to a bug in it's findOptimalMemOpLowering implementation. For now do the // same thing here. if (!findGISelOptimalMemOpLowering( - MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), - SrcAlign, - /*IsMemset=*/false, - /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, - /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) + MemOps, Limit, + MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, + /*IsVolatile*/ true), + DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MF.getFunction().getAttributes(), TLI)) return false; if (DstAlignCanChange) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5908,12 +5908,12 @@ bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); - + MemOp Op = isZeroConstant ? MemOp::Set(Size, DstAlignCanChange, Alignment, + /*IsZeroMemset*/ true, isVol) + : MemOp::Copy(Size, DstAlignCanChange, Alignment, + SrcAlign, isVol, CopyFromConstant); if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment), - (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, - /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, - /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), + MemOps, Limit, Op, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) return SDValue(); @@ -6088,14 +6088,11 @@ if (Align > SrcAlign) SrcAlign = Align; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); - // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in - // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the - // correct code. - bool AllowOverlap = false; if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign, - /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, - AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MemOps, Limit, + MemOp::Copy(Size, DstAlignCanChange, Align, SrcAlign, + /*IsVolatile*/ true), + DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) return SDValue(); @@ -6193,11 +6190,9 @@ bool IsZeroVal = isa(Src) && cast(Src)->isNullValue(); if (!TLI.findOptimalMemOpLowering( - MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, - (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true, - /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, - /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u, - MF.getFunction().getAttributes())) + MemOps, TLI.getMaxStoresPerMemset(OptSize), + MemOp::Set(Size, DstAlignCanChange, Align, IsZeroVal, isVol), + DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -176,16 +176,9 @@ return LowerCallTo(CLI); } -bool -TargetLowering::findOptimalMemOpLowering(std::vector &MemOps, - unsigned Limit, uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, - bool ZeroMemset, - bool MemcpyStrSrc, - bool AllowOverlap, - unsigned DstAS, unsigned SrcAS, - const AttributeList &FuncAttributes) const { +bool TargetLowering::findOptimalMemOpLowering( + std::vector &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, + unsigned SrcAS, const AttributeList &FuncAttributes) const { // If 'SrcAlign' is zero, that means the memory operation does not need to // load the value, i.e. memset or memcpy from constant string. Otherwise, // it's the inferred alignment of the source. 'DstAlign', on the other hand, @@ -193,20 +186,18 @@ // means it's possible to change the alignment of the destination. // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does // not need to be loaded. - if (!(SrcAlign == 0 || SrcAlign >= DstAlign)) + if (!(Op.SrcAlign == 0 || Op.SrcAlign >= Op.DstAlign)) return false; - EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign, - IsMemset, ZeroMemset, MemcpyStrSrc, - FuncAttributes); + EVT VT = getOptimalMemOpType(Op, FuncAttributes); if (VT == MVT::Other) { // Use the largest integer type whose alignment constraints are satisfied. // We only need to check DstAlign here as SrcAlign is always greater or // equal to DstAlign (or zero). VT = MVT::i64; - while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && - !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) + while (Op.DstAlign && Op.DstAlign < VT.getSizeInBits() / 8 && + !allowsMisalignedMemoryAccesses(VT, DstAS, Op.DstAlign)) VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); assert(VT.isInteger()); @@ -223,6 +214,7 @@ } unsigned NumMemOps = 0; + auto Size = Op.Size; while (Size != 0) { unsigned VTSize = VT.getSizeInBits() / 8; while (VTSize > Size) { @@ -257,8 +249,8 @@ // If the new VT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. bool Fast; - if (NumMemOps && AllowOverlap && NewVTSize < Size && - allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, + if (NumMemOps && Op.AllowOverlap && NewVTSize < Size && + allowsMisalignedMemoryAccesses(VT, DstAS, Op.DstAlign, MachineMemOperand::MONone, &Fast) && Fast) VTSize = Size; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -428,13 +428,11 @@ bool shouldConsiderGEPOffsetSplit() const override; - EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override; - LLT getOptimalMemOpLLT(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, - const AttributeList &FuncAttributes) const override; + LLT getOptimalMemOpLLT(const MemOp &Op, + const AttributeList &FuncAttributes) const override; /// Return true if the addressing mode represented by AM is legal for this /// target, for a load/store of the specified type. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9426,9 +9426,7 @@ } EVT AArch64TargetLowering::getOptimalMemOpType( - uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, - bool ZeroMemset, bool MemcpyStrSrc, - const AttributeList &FuncAttributes) const { + const MemOp &Op, const AttributeList &FuncAttributes) const { bool CanImplicitFloat = !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat); bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat; @@ -9436,9 +9434,9 @@ // Only use AdvSIMD to implement memset of 32-byte and above. It would have // taken one instruction to materialize the v2i64 zero and one store (with // restrictive addressing mode). Just do i64 stores. - bool IsSmallMemset = IsMemset && Size < 32; + bool IsSmallMemset = Op.IsMemset && Op.Size < 32; auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) { - if (memOpAlign(SrcAlign, DstAlign, AlignCheck)) + if (memOpAlign(Op.SrcAlign, Op.DstAlign, AlignCheck)) return true; bool Fast; return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone, @@ -9446,22 +9444,20 @@ Fast; }; - if (CanUseNEON && IsMemset && !IsSmallMemset && + if (CanUseNEON && Op.IsMemset && !IsSmallMemset && AlignmentIsAcceptable(MVT::v2i64, 16)) return MVT::v2i64; if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16)) return MVT::f128; - if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8)) + if (Op.Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8)) return MVT::i64; - if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4)) + if (Op.Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4)) return MVT::i32; return MVT::Other; } LLT AArch64TargetLowering::getOptimalMemOpLLT( - uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, - bool ZeroMemset, bool MemcpyStrSrc, - const AttributeList &FuncAttributes) const { + const MemOp &Op, const AttributeList &FuncAttributes) const { bool CanImplicitFloat = !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat); bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat; @@ -9469,9 +9465,9 @@ // Only use AdvSIMD to implement memset of 32-byte and above. It would have // taken one instruction to materialize the v2i64 zero and one store (with // restrictive addressing mode). Just do i64 stores. - bool IsSmallMemset = IsMemset && Size < 32; + bool IsSmallMemset = Op.IsMemset && Op.Size < 32; auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) { - if (memOpAlign(SrcAlign, DstAlign, AlignCheck)) + if (memOpAlign(Op.SrcAlign, Op.DstAlign, AlignCheck)) return true; bool Fast; return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone, @@ -9479,14 +9475,14 @@ Fast; }; - if (CanUseNEON && IsMemset && !IsSmallMemset && + if (CanUseNEON && Op.IsMemset && !IsSmallMemset && AlignmentIsAcceptable(MVT::v2i64, 16)) return LLT::vector(2, 64); if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16)) return LLT::scalar(128); - if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8)) + if (Op.Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8)) return LLT::scalar(64); - if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4)) + if (Op.Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4)) return LLT::scalar(32); return LLT(); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -257,10 +257,7 @@ MachineMemOperand::Flags Flags = MachineMemOperand::MONone, bool *IsFast = nullptr) const override; - EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, bool IsMemset, - bool ZeroMemset, - bool MemcpyStrSrc, + EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override; bool isMemOpUniform(const SDNode *N) const; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1320,18 +1320,16 @@ } EVT SITargetLowering::getOptimalMemOpType( - uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, - bool ZeroMemset, bool MemcpyStrSrc, - const AttributeList &FuncAttributes) const { + const MemOp &Op, const AttributeList &FuncAttributes) const { // FIXME: Should account for address space here. // The default fallback uses the private pointer size as a guess for a type to // use. Make sure we switch these to 64-bit accesses. - if (Size >= 16 && DstAlign >= 4) // XXX: Should only do for global + if (Op.Size >= 16 && Op.DstAlign >= 4) // XXX: Should only do for global return MVT::v4i32; - if (Size >= 8 && DstAlign >= 4) + if (Op.Size >= 8 && Op.DstAlign >= 4) return MVT::v2i32; // Use the default. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -347,10 +347,7 @@ MachineMemOperand::Flags Flags, bool *Fast) const override; - EVT getOptimalMemOpType(uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, bool ZeroMemset, - bool MemcpyStrSrc, + EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override; bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14961,21 +14961,19 @@ } EVT ARMTargetLowering::getOptimalMemOpType( - uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, - bool ZeroMemset, bool MemcpyStrSrc, - const AttributeList &FuncAttributes) const { + const MemOp &Op, const AttributeList &FuncAttributes) const { // See if we can use NEON instructions for this... - if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() && + if ((!Op.IsMemset || Op.ZeroMemset) && Subtarget->hasNEON() && !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { bool Fast; - if (Size >= 16 && - (memOpAlign(SrcAlign, DstAlign, 16) || + if (Op.Size >= 16 && + (memOpAlign(Op.SrcAlign, Op.DstAlign, 16) || (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, MachineMemOperand::MONone, &Fast) && Fast))) { return MVT::v2f64; - } else if (Size >= 8 && - (memOpAlign(SrcAlign, DstAlign, 8) || + } else if (Op.Size >= 8 && + (memOpAlign(Op.SrcAlign, Op.DstAlign, 8) || (allowsMisalignedMemoryAccesses( MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) && Fast))) { diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -576,8 +576,9 @@ // loaded and stored. That's why we multiply the number of elements by 2 to // get the cost for this memcpy. if (getTLI()->findOptimalMemOpLowering( - MemOps, Limit, Size, DstAlign, SrcAlign, false /*IsMemset*/, - false /*ZeroMemset*/, false /*MemcpyStrSrc*/, false /*AllowOverlap*/, + MemOps, Limit, + MemOp::Copy(Size, /*DstAlignCanChange*/ false, DstAlign, SrcAlign, + /*IsVolatile*/ true), MI->getDestAddressSpace(), MI->getSourceAddressSpace(), F->getAttributes())) return MemOps.size() * 2; diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -99,10 +99,9 @@ const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; - EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override { - return Size >= 8 ? MVT::i64 : MVT::i32; + return Op.Size >= 8 ? MVT::i64 : MVT::i32; } bool shouldConvertConstantLoadToIntImm(const APInt &Imm, diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -302,9 +302,8 @@ /// the immediate into a register. bool isLegalICmpImmediate(int64_t Imm) const override; - EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, - const AttributeList &FuncAttributes) const override; + EVT getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3379,19 +3379,21 @@ /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it /// does not need to be loaded. It returns EVT::Other if the type should be /// determined using generic target-independent logic. -EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, - bool MemcpyStrSrc, const AttributeList &FuncAttributes) const { +EVT HexagonTargetLowering::getOptimalMemOpType( + const MemOp &Op, const AttributeList &FuncAttributes) const { auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool { return (GivenA % MinA) == 0; }; - if (Size >= 8 && Aligned(DstAlign, 8) && (IsMemset || Aligned(SrcAlign, 8))) + if (Op.Size >= 8 && Aligned(Op.DstAlign, 8) && + (Op.IsMemset || Aligned(Op.SrcAlign, 8))) return MVT::i64; - if (Size >= 4 && Aligned(DstAlign, 4) && (IsMemset || Aligned(SrcAlign, 4))) + if (Op.Size >= 4 && Aligned(Op.DstAlign, 4) && + (Op.IsMemset || Aligned(Op.SrcAlign, 4))) return MVT::i32; - if (Size >= 2 && Aligned(DstAlign, 2) && (IsMemset || Aligned(SrcAlign, 2))) + if (Op.Size >= 2 && Aligned(Op.DstAlign, 2) && + (Op.IsMemset || Aligned(Op.SrcAlign, 2))) return MVT::i16; return MVT::Other; diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -669,10 +669,7 @@ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, - bool IsMemset, bool ZeroMemset, - bool MemcpyStrSrc, + EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override; /// isFPImmLegal - Returns true if the target can instruction select the diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -4269,9 +4269,7 @@ } EVT MipsTargetLowering::getOptimalMemOpType( - uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, - bool ZeroMemset, bool MemcpyStrSrc, - const AttributeList &FuncAttributes) const { + const MemOp &Op, const AttributeList &FuncAttributes) const { if (Subtarget.hasMips64()) return MVT::i64; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -892,21 +892,10 @@ MachineFunction &MF, unsigned Intrinsic) const override; - /// getOptimalMemOpType - Returns the target specific optimal type for load - /// and store operations as a result of memset, memcpy, and memmove - /// lowering. If DstAlign is zero that means it's safe to destination - /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it - /// means there isn't a need to check it against alignment requirement, - /// probably because the source does not need to be loaded. If 'IsMemset' is - /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that - /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy - /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. - EVT - getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, - const AttributeList &FuncAttributes) const override; + EVT getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const override; /// Is unaligned memory access allowed for the given type, and is it fast /// relative to software emulation. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15069,35 +15069,27 @@ return false; } -/// getOptimalMemOpType - Returns the target specific optimal type for load -/// and store operations as a result of memset, memcpy, and memmove -/// lowering. If DstAlign is zero that means it's safe to destination -/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it -/// means there isn't a need to check it against alignment requirement, -/// probably because the source does not need to be loaded. If 'IsMemset' is -/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that -/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy -/// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType( - uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, - bool ZeroMemset, bool MemcpyStrSrc, - const AttributeList &FuncAttributes) const { + const MemOp &Op, const AttributeList &FuncAttributes) const { if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { // When expanding a memset, require at least two QPX instructions to cover // the cost of loading the value to be stored from the constant pool. - if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && - (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && + if (Subtarget.hasQPX() && Op.Size >= 32 && + (!Op.IsMemset || Op.Size >= 64) && + (!Op.SrcAlign || Op.SrcAlign >= 32) && + (!Op.DstAlign || Op.DstAlign >= 32) && !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { return MVT::v4f64; } // We should use Altivec/VSX loads and stores when available. For unaligned // addresses, unaligned VSX loads are only fast starting with the P8. - if (Subtarget.hasAltivec() && Size >= 16 && - (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || - ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) + if (Subtarget.hasAltivec() && Op.Size >= 16 && + (((!Op.SrcAlign || Op.SrcAlign >= 16) && + (!Op.DstAlign || Op.DstAlign >= 16)) || + ((Op.IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) return MVT::v4i32; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -758,19 +758,7 @@ unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override; - /// Returns the target specific optimal type for load - /// and store operations as a result of memset, memcpy, and memmove - /// lowering. If DstAlign is zero that means it's safe to destination - /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it - /// means there isn't a need to check it against alignment requirement, - /// probably because the source does not need to be loaded. If 'IsMemset' is - /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that - /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy - /// source is constant so it does not need to be loaded. - /// It returns EVT::Other if the type should be determined using generic - /// target-independent logic. - EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override; /// Returns true if it's safe to use load / store of the diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2245,34 +2245,23 @@ return Align; } -/// Returns the target specific optimal type for load -/// and store operations as a result of memset, memcpy, and memmove -/// lowering. If DstAlign is zero that means it's safe to destination -/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it -/// means there isn't a need to check it against alignment requirement, -/// probably because the source does not need to be loaded. If 'IsMemset' is -/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that -/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy -/// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. /// For vector ops we check that the overall size isn't larger than our /// preferred vector width. EVT X86TargetLowering::getOptimalMemOpType( - uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, - bool ZeroMemset, bool MemcpyStrSrc, - const AttributeList &FuncAttributes) const { + const MemOp &Op, const AttributeList &FuncAttributes) const { if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { - if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() || - ((DstAlign == 0 || DstAlign >= 16) && - (SrcAlign == 0 || SrcAlign >= 16)))) { + if (Op.Size >= 16 && (!Subtarget.isUnalignedMem16Slow() || + ((Op.DstAlign == 0 || Op.DstAlign >= 16) && + (Op.SrcAlign == 0 || Op.SrcAlign >= 16)))) { // FIXME: Check if unaligned 64-byte accesses are slow. - if (Size >= 64 && Subtarget.hasAVX512() && + if (Op.Size >= 64 && Subtarget.hasAVX512() && (Subtarget.getPreferVectorWidth() >= 512)) { return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; } // FIXME: Check if unaligned 32-byte accesses are slow. - if (Size >= 32 && Subtarget.hasAVX() && + if (Op.Size >= 32 && Subtarget.hasAVX() && (Subtarget.getPreferVectorWidth() >= 256)) { // Although this isn't a well-supported type for AVX1, we'll let // legalization and shuffle lowering produce the optimal codegen. If we @@ -2288,8 +2277,8 @@ if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) && (Subtarget.getPreferVectorWidth() >= 128)) return MVT::v4f32; - } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 && - !Subtarget.is64Bit() && Subtarget.hasSSE2()) { + } else if ((!Op.IsMemset || Op.ZeroMemset) && !Op.MemcpyStrSrc && + Op.Size >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { // Do not use f64 to lower memcpy if source is string constant. It's // better to use i32 to avoid the loads. // Also, do not use f64 to lower memset unless this is a memset of zeros. @@ -2302,7 +2291,7 @@ // This is a compromise. If we reach here, unaligned accesses may be slow on // this target. However, creating smaller, aligned accesses could be even // slower and would certainly be a lot more code. - if (Subtarget.is64Bit() && Size >= 8) + if (Subtarget.is64Bit() && Op.Size >= 8) return MVT::i64; return MVT::i32; } diff --git a/llvm/test/CodeGen/AArch64/memcpy-f128.ll b/llvm/test/CodeGen/AArch64/memcpy-f128.ll --- a/llvm/test/CodeGen/AArch64/memcpy-f128.ll +++ b/llvm/test/CodeGen/AArch64/memcpy-f128.ll @@ -7,9 +7,6 @@ define void @test1() { ; CHECK-LABEL: @test1 -; CHECK: adrp -; CHECK: ldr q0 -; CHECK: str q0 ; CHECK: ret entry: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 undef, i8* align 8 bitcast (%structA* @stubA to i8*), i64 48, i1 false)