diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -154,15 +154,35 @@ } uint64_t size() const { return Size; } - uint64_t getDstAlign() const { - return DstAlignCanChange ? 0 : DstAlign.value(); + Align getDstAlign() const { + assert(!DstAlignCanChange); + return DstAlign; } + bool isFixedDstAlign() const { return !DstAlignCanChange; } bool allowOverlap() const { return AllowOverlap; } bool isMemset() const { return IsMemset; } bool isMemcpy() const { return !IsMemset; } - bool isZeroMemset() const { return ZeroMemset; } - bool isMemcpyStrSrc() const { return MemcpyStrSrc; } - uint64_t getSrcAlign() const { return isMemset() ? 0 : SrcAlign.value(); } + bool isMemcpyWithFixedDstAlign() const { + return isMemcpy() && !DstAlignCanChange; + } + bool isZeroMemset() const { return isMemset() && ZeroMemset; } + bool isMemcpyStrSrc() const { + assert(isMemcpy() && "Must be a memcpy"); + return MemcpyStrSrc; + } + Align getSrcAlign() const { + assert(isMemcpy() && "Must be a memcpy"); + return SrcAlign; + } + bool isSrcAligned(Align AlignCheck) const { + return isMemset() || llvm::isAligned(AlignCheck, SrcAlign.value()); + } + bool isDstAligned(Align AlignCheck) const { + return DstAlignCanChange || llvm::isAligned(AlignCheck, DstAlign.value()); + } + bool isAligned(Align AlignCheck) const { + return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck); + } }; /// This base class for TargetLowering contains the SelectionDAG-independent diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -860,7 +860,7 @@ unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI) { - if (Op.getSrcAlign() != 0 && Op.getSrcAlign() < Op.getDstAlign()) + if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) return false; LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes); @@ -870,16 +870,18 @@ // We only need to check DstAlign here as SrcAlign is always greater or // equal to DstAlign (or zero). Ty = LLT::scalar(64); - while (Op.getDstAlign() && Op.getDstAlign() < Ty.getSizeInBytes() && - !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign())) - Ty = LLT::scalar(Ty.getSizeInBytes()); + if (Op.isFixedDstAlign()) + while (Op.getDstAlign() < Ty.getSizeInBytes() && + !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, + Op.getDstAlign().value())) + Ty = LLT::scalar(Ty.getSizeInBytes()); assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); // FIXME: check for the largest legal type we can load/store to. } unsigned NumMemOps = 0; - auto Size = Op.size(); - while (Size != 0) { + uint64_t Size = Op.size(); + while (Size) { unsigned TySize = Ty.getSizeInBytes(); while (TySize > Size) { // For now, only use non-vector load / store's for the left-over pieces. @@ -899,7 +901,8 @@ MVT VT = getMVTForLLT(Ty); if (NumMemOps && Op.allowOverlap() && NewTySize < Size && TLI.allowsMisalignedMemoryAccesses( - VT, DstAS, Op.getDstAlign(), MachineMemOperand::MONone, &Fast) && + VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0, + MachineMemOperand::MONone, &Fast) && Fast) TySize = Size; else { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -179,14 +179,7 @@ bool TargetLowering::findOptimalMemOpLowering( std::vector &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const { - // If 'SrcAlign' is zero, that means the memory operation does not need to - // load the value, i.e. memset or memcpy from constant string. Otherwise, - // it's the inferred alignment of the source. 'DstAlign', on the other hand, - // is the specified alignment of the memory operation. If it is zero, that - // means it's possible to change the alignment of the destination. - // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does - // not need to be loaded. - if (!(Op.getSrcAlign() == 0 || Op.getSrcAlign() >= Op.getDstAlign())) + if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) return false; EVT VT = getOptimalMemOpType(Op, FuncAttributes); @@ -196,9 +189,11 @@ // We only need to check DstAlign here as SrcAlign is always greater or // equal to DstAlign (or zero). VT = MVT::i64; - while (Op.getDstAlign() && Op.getDstAlign() < VT.getSizeInBits() / 8 && - !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign())) - VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); + if (Op.isFixedDstAlign()) + while ( + Op.getDstAlign() < (VT.getSizeInBits() / 8) && + !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign().value())) + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); assert(VT.isInteger()); // Find the largest legal integer type. @@ -214,8 +209,8 @@ } unsigned NumMemOps = 0; - auto Size = Op.size(); - while (Size != 0) { + uint64_t Size = Op.size(); + while (Size) { unsigned VTSize = VT.getSizeInBits() / 8; while (VTSize > Size) { // For now, only use non-vector load / store's for the left-over pieces. @@ -250,8 +245,9 @@ // issuing a (or a pair of) unaligned and overlapping load / store. bool Fast; if (NumMemOps && Op.allowOverlap() && NewVTSize < Size && - allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign(), - MachineMemOperand::MONone, &Fast) && + allowsMisalignedMemoryAccesses( + VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0, + MachineMemOperand::MONone, &Fast) && Fast) VTSize = Size; else { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9413,11 +9413,6 @@ return true; } -static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, - unsigned AlignCheck) { - return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && - (DstAlign == 0 || DstAlign % AlignCheck == 0)); -} EVT AArch64TargetLowering::getOptimalMemOpType( const MemOp &Op, const AttributeList &FuncAttributes) const { @@ -9429,8 +9424,8 @@ // taken one instruction to materialize the v2i64 zero and one store (with // restrictive addressing mode). Just do i64 stores. bool IsSmallMemset = Op.isMemset() && Op.size() < 32; - auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) { - if (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), AlignCheck)) + auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) { + if (Op.isAligned(AlignCheck)) return true; bool Fast; return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone, @@ -9439,13 +9434,13 @@ }; if (CanUseNEON && Op.isMemset() && !IsSmallMemset && - AlignmentIsAcceptable(MVT::v2i64, 16)) + AlignmentIsAcceptable(MVT::v2i64, Align(16))) return MVT::v2i64; - if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16)) + if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16))) return MVT::f128; - if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, 8)) + if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8))) return MVT::i64; - if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, 4)) + if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4))) return MVT::i32; return MVT::Other; } @@ -9460,8 +9455,8 @@ // taken one instruction to materialize the v2i64 zero and one store (with // restrictive addressing mode). Just do i64 stores. bool IsSmallMemset = Op.isMemset() && Op.size() < 32; - auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) { - if (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), AlignCheck)) + auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) { + if (Op.isAligned(AlignCheck)) return true; bool Fast; return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone, @@ -9470,13 +9465,13 @@ }; if (CanUseNEON && Op.isMemset() && !IsSmallMemset && - AlignmentIsAcceptable(MVT::v2i64, 16)) + AlignmentIsAcceptable(MVT::v2i64, Align(16))) return LLT::vector(2, 64); - if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16)) + if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16))) return LLT::scalar(128); - if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, 8)) + if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8))) return LLT::scalar(64); - if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, 4)) + if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4))) return LLT::scalar(32); return LLT(); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1299,10 +1299,10 @@ // use. Make sure we switch these to 64-bit accesses. if (Op.size() >= 16 && - Op.getDstAlign() >= 4) // XXX: Should only do for global + Op.isDstAligned(Align(4))) // XXX: Should only do for global return MVT::v4i32; - if (Op.size() >= 8 && Op.getDstAlign() >= 4) + if (Op.size() >= 8 && Op.isDstAligned(Align(4))) return MVT::v2i32; // Use the default. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -15018,26 +15018,21 @@ return false; } -static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, - unsigned AlignCheck) { - return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && - (DstAlign == 0 || DstAlign % AlignCheck == 0)); -} EVT ARMTargetLowering::getOptimalMemOpType( const MemOp &Op, const AttributeList &FuncAttributes) const { // See if we can use NEON instructions for this... - if ((!Op.isMemset() || Op.isZeroMemset()) && Subtarget->hasNEON() && + if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() && !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { bool Fast; if (Op.size() >= 16 && - (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), 16) || + (Op.isAligned(Align(16)) || (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, MachineMemOperand::MONone, &Fast) && Fast))) { return MVT::v2f64; } else if (Op.size() >= 8 && - (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), 8) || + (Op.isAligned(Align(8)) || (allowsMisalignedMemoryAccesses( MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) && Fast))) { diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3380,21 +3380,12 @@ /// determined using generic target-independent logic. EVT HexagonTargetLowering::getOptimalMemOpType( const MemOp &Op, const AttributeList &FuncAttributes) const { - - auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool { - return (GivenA % MinA) == 0; - }; - - if (Op.size() >= 8 && Aligned(Op.getDstAlign(), 8) && - (Op.isMemset() || Aligned(Op.getSrcAlign(), 8))) + if (Op.size() >= 8 && Op.isAligned(Align(8))) return MVT::i64; - if (Op.size() >= 4 && Aligned(Op.getDstAlign(), 4) && - (Op.isMemset() || Aligned(Op.getSrcAlign(), 4))) + if (Op.size() >= 4 && Op.isAligned(Align(4))) return MVT::i32; - if (Op.size() >= 2 && Aligned(Op.getDstAlign(), 2) && - (Op.isMemset() || Aligned(Op.getSrcAlign(), 2))) + if (Op.size() >= 2 && Op.isAligned(Align(2))) return MVT::i16; - return MVT::Other; } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15120,9 +15120,7 @@ // When expanding a memset, require at least two QPX instructions to cover // the cost of loading the value to be stored from the constant pool. if (Subtarget.hasQPX() && Op.size() >= 32 && - (!Op.isMemset() || Op.size() >= 64) && - (!Op.getSrcAlign() || Op.getSrcAlign() >= 32) && - (!Op.getDstAlign() || Op.getDstAlign() >= 32) && + (Op.isMemcpy() || Op.size() >= 64) && Op.isAligned(Align(32)) && !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { return MVT::v4f64; } @@ -15130,8 +15128,7 @@ // We should use Altivec/VSX loads and stores when available. For unaligned // addresses, unaligned VSX loads are only fast starting with the P8. if (Subtarget.hasAltivec() && Op.size() >= 16 && - (((!Op.getSrcAlign() || Op.getSrcAlign() >= 16) && - (!Op.getDstAlign() || Op.getDstAlign() >= 16)) || + (Op.isAligned(Align(16)) || ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) return MVT::v4i32; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2264,9 +2264,7 @@ const MemOp &Op, const AttributeList &FuncAttributes) const { if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { if (Op.size() >= 16 && - (!Subtarget.isUnalignedMem16Slow() || - ((Op.getDstAlign() == 0 || Op.getDstAlign() >= 16) && - (Op.getSrcAlign() == 0 || Op.getSrcAlign() >= 16)))) { + (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) { // FIXME: Check if unaligned 64-byte accesses are slow. if (Op.size() >= 64 && Subtarget.hasAVX512() && (Subtarget.getPreferVectorWidth() >= 512)) { @@ -2289,7 +2287,7 @@ if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) && (Subtarget.getPreferVectorWidth() >= 128)) return MVT::v4f32; - } else if ((!Op.isMemset() || Op.isZeroMemset()) && !Op.isMemcpyStrSrc() && + } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) && Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { // Do not use f64 to lower memcpy if source is string constant. It's // better to use i32 to avoid the loads.