diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -108,10 +108,13 @@ // MemOp models a memory operation, either memset or memcpy/memmove. struct MemOp { +private: // Shared uint64_t Size; - uint64_t DstAlign; // Specified alignment of the memory operation or zero if - // destination alignment can satisfy any constraint. + bool DstAlignCanChange; // true if destination alignment can satisfy any + // constraint. + Align DstAlign; // Specified alignment of the memory operation. + bool AllowOverlap; // memset only bool IsMemset; // If setthis memory operation is a memset. @@ -119,34 +122,47 @@ // memcpy only bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register // constant so it does not need to be loaded. - uint64_t SrcAlign; // Inferred alignment of the source or zero if the memory - // operation does not need to load the value. - + Align SrcAlign; // Inferred alignment of the source or default value if the + // memory operation does not need to load the value. +public: static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc = false) { - return { - /*.Size =*/Size, - /*.DstAlign =*/DstAlignCanChange ? 0 : DstAlign.value(), - /*.AllowOverlap =*/!IsVolatile, - /*.IsMemset =*/false, - /*.ZeroMemset =*/false, - /*.MemcpyStrSrc =*/MemcpyStrSrc, - /*.SrcAlign =*/SrcAlign.value(), - }; + MemOp Op; + Op.Size = Size; + Op.DstAlignCanChange = DstAlignCanChange; + Op.DstAlign = DstAlign; + Op.AllowOverlap = !IsVolatile; + Op.IsMemset = false; + Op.ZeroMemset = false; + Op.MemcpyStrSrc = MemcpyStrSrc; + Op.SrcAlign = SrcAlign; + return Op; } + static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile) { - return { - /*.Size =*/Size, - /*.DstAlign =*/DstAlignCanChange ? 0 : DstAlign.value(), - /*.AllowOverlap =*/!IsVolatile, - /*.IsMemset =*/true, - /*.ZeroMemset =*/IsZeroMemset, - /*.MemcpyStrSrc =*/false, - /*.SrcAlign =*/0, - }; - } + MemOp Op; + Op.Size = Size; + Op.DstAlignCanChange = DstAlignCanChange; + Op.DstAlign = DstAlign; + Op.AllowOverlap = !IsVolatile; + Op.IsMemset = true; + Op.ZeroMemset = IsZeroMemset; + Op.MemcpyStrSrc = false; + return Op; + } + + uint64_t size() const { return Size; } + uint64_t getDstAlign() const { + return DstAlignCanChange ? 0 : DstAlign.value(); + } + bool allowOverlap() const { return AllowOverlap; } + bool isMemset() const { return IsMemset; } + bool isMemcpy() const { return !IsMemset; } + bool isZeroMemset() const { return ZeroMemset; } + bool isMemcpyStrSrc() const { return MemcpyStrSrc; } + uint64_t getSrcAlign() const { return isMemset() ? 0 : SrcAlign.value(); } }; /// This base class for TargetLowering contains the SelectionDAG-independent diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -860,7 +860,7 @@ unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI) { - if (Op.SrcAlign != 0 && Op.SrcAlign < Op.DstAlign) + if (Op.getSrcAlign() != 0 && Op.getSrcAlign() < Op.getDstAlign()) return false; LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes); @@ -870,15 +870,15 @@ // We only need to check DstAlign here as SrcAlign is always greater or // equal to DstAlign (or zero). Ty = LLT::scalar(64); - while (Op.DstAlign && Op.DstAlign < Ty.getSizeInBytes() && - !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.DstAlign)) + while (Op.getDstAlign() && Op.getDstAlign() < Ty.getSizeInBytes() && + !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign())) Ty = LLT::scalar(Ty.getSizeInBytes()); assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); // FIXME: check for the largest legal type we can load/store to. } unsigned NumMemOps = 0; - auto Size = Op.Size; + auto Size = Op.size(); while (Size != 0) { unsigned TySize = Ty.getSizeInBytes(); while (TySize > Size) { @@ -897,9 +897,9 @@ bool Fast; // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). MVT VT = getMVTForLLT(Ty); - if (NumMemOps && Op.AllowOverlap && NewTySize < Size && + if (NumMemOps && Op.allowOverlap() && NewTySize < Size && TLI.allowsMisalignedMemoryAccesses( - VT, DstAS, Op.DstAlign, MachineMemOperand::MONone, &Fast) && + VT, DstAS, Op.getDstAlign(), MachineMemOperand::MONone, &Fast) && Fast) TySize = Size; else { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -186,7 +186,7 @@ // means it's possible to change the alignment of the destination. // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does // not need to be loaded. - if (!(Op.SrcAlign == 0 || Op.SrcAlign >= Op.DstAlign)) + if (!(Op.getSrcAlign() == 0 || Op.getSrcAlign() >= Op.getDstAlign())) return false; EVT VT = getOptimalMemOpType(Op, FuncAttributes); @@ -196,8 +196,8 @@ // We only need to check DstAlign here as SrcAlign is always greater or // equal to DstAlign (or zero). VT = MVT::i64; - while (Op.DstAlign && Op.DstAlign < VT.getSizeInBits() / 8 && - !allowsMisalignedMemoryAccesses(VT, DstAS, Op.DstAlign)) + while (Op.getDstAlign() && Op.getDstAlign() < VT.getSizeInBits() / 8 && + !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign())) VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); assert(VT.isInteger()); @@ -214,7 +214,7 @@ } unsigned NumMemOps = 0; - auto Size = Op.Size; + auto Size = Op.size(); while (Size != 0) { unsigned VTSize = VT.getSizeInBits() / 8; while (VTSize > Size) { @@ -249,8 +249,8 @@ // If the new VT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. bool Fast; - if (NumMemOps && Op.AllowOverlap && NewVTSize < Size && - allowsMisalignedMemoryAccesses(VT, DstAS, Op.DstAlign, + if (NumMemOps && Op.allowOverlap() && NewVTSize < Size && + allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign(), MachineMemOperand::MONone, &Fast) && Fast) VTSize = Size; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9436,9 +9436,9 @@ // Only use AdvSIMD to implement memset of 32-byte and above. It would have // taken one instruction to materialize the v2i64 zero and one store (with // restrictive addressing mode). Just do i64 stores. - bool IsSmallMemset = Op.IsMemset && Op.Size < 32; + bool IsSmallMemset = Op.isMemset() && Op.size() < 32; auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) { - if (memOpAlign(Op.SrcAlign, Op.DstAlign, AlignCheck)) + if (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), AlignCheck)) return true; bool Fast; return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone, @@ -9446,14 +9446,14 @@ Fast; }; - if (CanUseNEON && Op.IsMemset && !IsSmallMemset && + if (CanUseNEON && Op.isMemset() && !IsSmallMemset && AlignmentIsAcceptable(MVT::v2i64, 16)) return MVT::v2i64; if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16)) return MVT::f128; - if (Op.Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8)) + if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, 8)) return MVT::i64; - if (Op.Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4)) + if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, 4)) return MVT::i32; return MVT::Other; } @@ -9467,9 +9467,9 @@ // Only use AdvSIMD to implement memset of 32-byte and above. It would have // taken one instruction to materialize the v2i64 zero and one store (with // restrictive addressing mode). Just do i64 stores. - bool IsSmallMemset = Op.IsMemset && Op.Size < 32; + bool IsSmallMemset = Op.isMemset() && Op.size() < 32; auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) { - if (memOpAlign(Op.SrcAlign, Op.DstAlign, AlignCheck)) + if (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), AlignCheck)) return true; bool Fast; return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone, @@ -9477,14 +9477,14 @@ Fast; }; - if (CanUseNEON && Op.IsMemset && !IsSmallMemset && + if (CanUseNEON && Op.isMemset() && !IsSmallMemset && AlignmentIsAcceptable(MVT::v2i64, 16)) return LLT::vector(2, 64); if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16)) return LLT::scalar(128); - if (Op.Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8)) + if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, 8)) return LLT::scalar(64); - if (Op.Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4)) + if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, 4)) return LLT::scalar(32); return LLT(); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1326,10 +1326,11 @@ // The default fallback uses the private pointer size as a guess for a type to // use. Make sure we switch these to 64-bit accesses. - if (Op.Size >= 16 && Op.DstAlign >= 4) // XXX: Should only do for global + if (Op.size() >= 16 && + Op.getDstAlign() >= 4) // XXX: Should only do for global return MVT::v4i32; - if (Op.Size >= 8 && Op.DstAlign >= 4) + if (Op.size() >= 8 && Op.getDstAlign() >= 4) return MVT::v2i32; // Use the default. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -15027,17 +15027,17 @@ EVT ARMTargetLowering::getOptimalMemOpType( const MemOp &Op, const AttributeList &FuncAttributes) const { // See if we can use NEON instructions for this... - if ((!Op.IsMemset || Op.ZeroMemset) && Subtarget->hasNEON() && + if ((!Op.isMemset() || Op.isZeroMemset()) && Subtarget->hasNEON() && !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { bool Fast; - if (Op.Size >= 16 && - (memOpAlign(Op.SrcAlign, Op.DstAlign, 16) || + if (Op.size() >= 16 && + (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), 16) || (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, MachineMemOperand::MONone, &Fast) && Fast))) { return MVT::v2f64; - } else if (Op.Size >= 8 && - (memOpAlign(Op.SrcAlign, Op.DstAlign, 8) || + } else if (Op.size() >= 8 && + (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), 8) || (allowsMisalignedMemoryAccesses( MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) && Fast))) { diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -101,7 +101,7 @@ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override { - return Op.Size >= 8 ? MVT::i64 : MVT::i32; + return Op.size() >= 8 ? MVT::i64 : MVT::i32; } bool shouldConvertConstantLoadToIntImm(const APInt &Imm, diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3385,14 +3385,14 @@ return (GivenA % MinA) == 0; }; - if (Op.Size >= 8 && Aligned(Op.DstAlign, 8) && - (Op.IsMemset || Aligned(Op.SrcAlign, 8))) + if (Op.size() >= 8 && Aligned(Op.getDstAlign(), 8) && + (Op.isMemset() || Aligned(Op.getSrcAlign(), 8))) return MVT::i64; - if (Op.Size >= 4 && Aligned(Op.DstAlign, 4) && - (Op.IsMemset || Aligned(Op.SrcAlign, 4))) + if (Op.size() >= 4 && Aligned(Op.getDstAlign(), 4) && + (Op.isMemset() || Aligned(Op.getSrcAlign(), 4))) return MVT::i32; - if (Op.Size >= 2 && Aligned(Op.DstAlign, 2) && - (Op.IsMemset || Aligned(Op.SrcAlign, 2))) + if (Op.size() >= 2 && Aligned(Op.getDstAlign(), 2) && + (Op.isMemset() || Aligned(Op.getSrcAlign(), 2))) return MVT::i16; return MVT::Other; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15078,20 +15078,20 @@ if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { // When expanding a memset, require at least two QPX instructions to cover // the cost of loading the value to be stored from the constant pool. - if (Subtarget.hasQPX() && Op.Size >= 32 && - (!Op.IsMemset || Op.Size >= 64) && - (!Op.SrcAlign || Op.SrcAlign >= 32) && - (!Op.DstAlign || Op.DstAlign >= 32) && + if (Subtarget.hasQPX() && Op.size() >= 32 && + (!Op.isMemset() || Op.size() >= 64) && + (!Op.getSrcAlign() || Op.getSrcAlign() >= 32) && + (!Op.getDstAlign() || Op.getDstAlign() >= 32) && !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { return MVT::v4f64; } // We should use Altivec/VSX loads and stores when available. For unaligned // addresses, unaligned VSX loads are only fast starting with the P8. - if (Subtarget.hasAltivec() && Op.Size >= 16 && - (((!Op.SrcAlign || Op.SrcAlign >= 16) && - (!Op.DstAlign || Op.DstAlign >= 16)) || - ((Op.IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) + if (Subtarget.hasAltivec() && Op.size() >= 16 && + (((!Op.getSrcAlign() || Op.getSrcAlign() >= 16) && + (!Op.getDstAlign() || Op.getDstAlign() >= 16)) || + ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) return MVT::v4i32; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2252,16 +2252,17 @@ EVT X86TargetLowering::getOptimalMemOpType( const MemOp &Op, const AttributeList &FuncAttributes) const { if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { - if (Op.Size >= 16 && (!Subtarget.isUnalignedMem16Slow() || - ((Op.DstAlign == 0 || Op.DstAlign >= 16) && - (Op.SrcAlign == 0 || Op.SrcAlign >= 16)))) { + if (Op.size() >= 16 && + (!Subtarget.isUnalignedMem16Slow() || + ((Op.getDstAlign() == 0 || Op.getDstAlign() >= 16) && + (Op.getSrcAlign() == 0 || Op.getSrcAlign() >= 16)))) { // FIXME: Check if unaligned 64-byte accesses are slow. - if (Op.Size >= 64 && Subtarget.hasAVX512() && + if (Op.size() >= 64 && Subtarget.hasAVX512() && (Subtarget.getPreferVectorWidth() >= 512)) { return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; } // FIXME: Check if unaligned 32-byte accesses are slow. - if (Op.Size >= 32 && Subtarget.hasAVX() && + if (Op.size() >= 32 && Subtarget.hasAVX() && (Subtarget.getPreferVectorWidth() >= 256)) { // Although this isn't a well-supported type for AVX1, we'll let // legalization and shuffle lowering produce the optimal codegen. If we @@ -2277,8 +2278,8 @@ if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) && (Subtarget.getPreferVectorWidth() >= 128)) return MVT::v4f32; - } else if ((!Op.IsMemset || Op.ZeroMemset) && !Op.MemcpyStrSrc && - Op.Size >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { + } else if ((!Op.isMemset() || Op.isZeroMemset()) && !Op.isMemcpyStrSrc() && + Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { // Do not use f64 to lower memcpy if source is string constant. It's // better to use i32 to avoid the loads. // Also, do not use f64 to lower memset unless this is a memset of zeros. @@ -2291,7 +2292,7 @@ // This is a compromise. If we reach here, unaligned accesses may be slow on // this target. However, creating smaller, aligned accesses could be even // slower and would certainly be a lot more code. - if (Subtarget.is64Bit() && Op.Size >= 8) + if (Subtarget.is64Bit() && Op.size() >= 8) return MVT::i64; return MVT::i32; }