diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1060,7 +1060,7 @@ SDValue getStackArgumentTokenFactor(SDValue Chain); SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVol, + SDValue Size, Align DstAlign, Align SrcAlign, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, @@ -1068,8 +1068,8 @@ AAResults *AA = nullptr); SDValue getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVol, bool isTailCall, - MachinePointerInfo DstPtrInfo, + SDValue Size, Align DstAlign, Align SrcAlign, bool isVol, + bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo = AAMDNodes(), AAResults *AA = nullptr); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h --- a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h @@ -51,8 +51,8 @@ virtual SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, - Align Alignment, bool isVolatile, - bool AlwaysInline, + Align DstAlign, Align SrcAlign, + bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { return SDValue(); @@ -66,7 +66,7 @@ /// lowering strategy should be used. virtual SDValue EmitTargetCodeForMemmove( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, - SDValue Op2, SDValue Op3, Align Alignment, bool isVolatile, + SDValue Op2, SDValue Op3, Align DstAlign, Align SrcAlign, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6833,13 +6833,11 @@ } } -static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, - SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, Align Alignment, - bool isVol, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo, AAResults *AA) { +static SDValue getMemcpyLoadsAndStores( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + uint64_t Size, Align DstAlign, Align SrcAlign, bool isVol, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo, AAResults *AA) { // Turn a memcpy of undef to nop. // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) @@ -6860,20 +6858,18 @@ FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); - if (!SrcAlign || Alignment > *SrcAlign) - SrcAlign = Alignment; - assert(SrcAlign && "SrcAlign must be set"); + if (MaybeAlign InferedSrcAlign = DAG.InferPtrAlign(Src)) + SrcAlign = std::max(SrcAlign, *InferedSrcAlign); ConstantDataArraySlice Slice; // If marked as volatile, perform a copy even when marked as constant. bool CopyFromConstant = !isVol && isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); const MemOp Op = isZeroConstant - ? MemOp::Set(Size, DstAlignCanChange, Alignment, + ? MemOp::Set(Size, DstAlignCanChange, DstAlign, /*IsZeroMemset*/ true, isVol) - : MemOp::Copy(Size, DstAlignCanChange, Alignment, - *SrcAlign, isVol, CopyFromConstant); + : MemOp::Copy(Size, DstAlignCanChange, DstAlign, + SrcAlign, isVol, CopyFromConstant); if (!TLI.findOptimalMemOpLowering( MemOps, Limit, Op, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) @@ -6887,14 +6883,14 @@ // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->hasStackRealignment(MF)) - while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + while (NewAlign > DstAlign && DL.exceedsNaturalStackAlignment(NewAlign)) NewAlign = NewAlign.previous(); - if (NewAlign > Alignment) { + if (NewAlign > DstAlign) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Alignment = NewAlign; + DstAlign = NewAlign; } } @@ -6949,7 +6945,7 @@ Store = DAG.getStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); + DstPtrInfo.getWithOffset(DstOff), DstAlign, MMOFlags, NewAAInfo); OutChains.push_back(Store); } } @@ -6975,13 +6971,13 @@ ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo); + commonAlignment(SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo); + DstPtrInfo.getWithOffset(DstOff), VT, DstAlign, MMOFlags, NewAAInfo); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -7035,13 +7031,11 @@ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } -static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, - SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, Align Alignment, - bool isVol, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo) { +static SDValue getMemmoveLoadsAndStores( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + uint64_t Size, Align DstAlign, Align SrcAlign, bool isVol, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo) { // Turn a memmove of undef to nop. // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) @@ -7060,14 +7054,12 @@ FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); - if (!SrcAlign || Alignment > *SrcAlign) - SrcAlign = Alignment; - assert(SrcAlign && "SrcAlign must be set"); + if (MaybeAlign InferedSrcAlign = DAG.InferPtrAlign(Src)) + SrcAlign = std::max(SrcAlign, *InferedSrcAlign); unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); if (!TLI.findOptimalMemOpLowering( MemOps, Limit, - MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign, + MemOp::Copy(Size, DstAlignCanChange, DstAlign, SrcAlign, /*IsVolatile*/ true), DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) @@ -7076,11 +7068,11 @@ if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(C); Align NewAlign = DL.getABITypeAlign(Ty); - if (NewAlign > Alignment) { + if (NewAlign > DstAlign) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Alignment = NewAlign; + DstAlign = NewAlign; } } @@ -7109,7 +7101,7 @@ Value = DAG.getLoad( VT, dl, Chain, DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), - SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo); + SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags, NewAAInfo); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -7124,7 +7116,7 @@ Store = DAG.getStore( Chain, dl, LoadValues[i], DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); + DstPtrInfo.getWithOffset(DstOff), DstAlign, MMOFlags, NewAAInfo); OutChains.push_back(Store); DstOff += VTSize; } @@ -7254,9 +7246,9 @@ } SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, Align Alignment, - bool isVol, bool AlwaysInline, bool isTailCall, - MachinePointerInfo DstPtrInfo, + SDValue Src, SDValue Size, Align DstAlign, + Align SrcAlign, bool isVol, bool AlwaysInline, + bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo, AAResults *AA) { // Check to see if we should lower the memcpy to loads and stores first. @@ -7268,8 +7260,8 @@ return Chain; SDValue Result = getMemcpyLoadsAndStores( - *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo, AA); + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), DstAlign, + SrcAlign, isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo, AA); if (Result.getNode()) return Result; } @@ -7278,8 +7270,8 @@ // code. If the target chooses to do this, this is the next best. if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemcpy( - *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, - DstPtrInfo, SrcPtrInfo); + *this, dl, Chain, Dst, Src, Size, DstAlign, SrcAlign, isVol, + AlwaysInline, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -7289,8 +7281,8 @@ if (AlwaysInline) { assert(ConstantSize && "AlwaysInline requires a constant size!"); return getMemcpyLoadsAndStores( - *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo, AA); + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), DstAlign, + SrcAlign, isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo, AA); } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); @@ -7368,8 +7360,8 @@ } SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, Align Alignment, - bool isVol, bool isTailCall, + SDValue Src, SDValue Size, Align DstAlign, + Align SrcAlign, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo, AAResults *AA) { @@ -7382,8 +7374,8 @@ return Chain; SDValue Result = getMemmoveLoadsAndStores( - *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo); + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), DstAlign, + SrcAlign, isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo); if (Result.getNode()) return Result; } @@ -7391,9 +7383,9 @@ // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. if (TSI) { - SDValue Result = - TSI->EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, - Alignment, isVol, DstPtrInfo, SrcPtrInfo); + SDValue Result = TSI->EmitTargetCodeForMemmove( + *this, dl, Chain, Dst, Src, Size, DstAlign, SrcAlign, isVol, DstPtrInfo, + SrcPtrInfo); if (Result.getNode()) return Result; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5903,14 +5903,11 @@ // @llvm.memcpy defines 0 and 1 to both mean no alignment. Align DstAlign = MCI.getDestAlign().valueOrOne(); Align SrcAlign = MCI.getSourceAlign().valueOrOne(); - Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MCI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); - // FIXME: Support passing different dest/src alignments to the memcpy DAG - // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); SDValue MC = DAG.getMemcpy( - Root, sdl, Op1, Op2, Op3, Alignment, isVol, + Root, sdl, Op1, Op2, Op3, DstAlign, SrcAlign, isVol, /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); @@ -5925,13 +5922,10 @@ // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment. Align DstAlign = MCI.getDestAlign().valueOrOne(); Align SrcAlign = MCI.getSourceAlign().valueOrOne(); - Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MCI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); - // FIXME: Support passing different dest/src alignments to the memcpy DAG - // node. SDValue MC = DAG.getMemcpy( - getRoot(), sdl, Dst, Src, Size, Alignment, isVol, + getRoot(), sdl, Dst, Src, Size, DstAlign, SrcAlign, isVol, /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); @@ -5979,16 +5973,13 @@ // @llvm.memmove defines 0 and 1 to both mean no alignment. Align DstAlign = MMI.getDestAlign().valueOrOne(); Align SrcAlign = MMI.getSourceAlign().valueOrOne(); - Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MMI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); - // FIXME: Support passing different dest/src alignments to the memmove DAG - // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, - isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata(), AA); + SDValue MM = DAG.getMemmove( + Root, sdl, Op1, Op2, Op3, DstAlign, SrcAlign, isVol, isTC, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MM); return; } @@ -8103,8 +8094,6 @@ Align DstAlign = DAG.InferPtrAlign(Dst).valueOrOne(); Align SrcAlign = DAG.InferPtrAlign(Src).valueOrOne(); - // DAG::getMemcpy needs Alignment to be defined. - Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = false; SDLoc sdl = getCurSDLoc(); @@ -8113,11 +8102,10 @@ // because the return pointer needs to be adjusted by the size of // the copied memory. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false, - /*isTailCall=*/false, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + SDValue MC = DAG.getMemcpy( + Root, sdl, Dst, Src, Size, DstAlign, SrcAlign, isVol, false, + /*isTailCall=*/false, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata()); assert(MC.getNode() != nullptr && "** memcpy should not be lowered as TailCall in mempcpy context **"); DAG.setRoot(MC); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -205,7 +205,7 @@ if (VT == MVT::Other) { // Use the largest integer type whose alignment constraints are satisfied. // We only need to check DstAlign here as SrcAlign is always greater or - // equal to DstAlign (or zero). + // equal to DstAlign. VT = MVT::i64; if (Op.isFixedDstAlign()) while (Op.getDstAlign() < (VT.getSizeInBits() / 8) && diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4546,15 +4546,15 @@ SDValue Val = Op.getOperand(3); Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64); SDValue Size = Op.getOperand(4); - auto Alignment = Node->getMemOperand()->getAlign(); + auto DstAlign = Node->getMemOperand()->getAlign(); bool IsVol = Node->isVolatile(); auto DstPtrInfo = Node->getPointerInfo(); const auto &SDI = static_cast(DAG.getSelectionDAGInfo()); - SDValue MS = - SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val, - Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{}); + SDValue MS = SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, + Dst, Val, Size, DstAlign, Align(1), IsVol, + DstPtrInfo, MachinePointerInfo{}); // MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the // intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise @@ -7058,7 +7058,8 @@ DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64); SDValue Cpy = DAG.getMemcpy( Chain, DL, DstAddr, Arg, SizeNode, - Outs[i].Flags.getNonZeroByValAlign(), + /*DstAllign*/ Outs[i].Flags.getNonZeroByValAlign(), + /*SrcAllign*/ Outs[i].Flags.getNonZeroByValAlign(), /*isVol = */ false, /*AlwaysInline = */ false, /*isTailCall = */ false, DstInfo, MachinePointerInfo()); @@ -8935,8 +8936,9 @@ return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2), DAG.getConstant(VaListSize, DL, MVT::i32), - Align(PtrSize), false, false, false, - MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); + /*DstAlign*/ Align(PtrSize), /*SrcAlign*/ Align(PtrSize), + false, false, false, MachinePointerInfo(DestSV), + MachinePointerInfo(SrcSV)); } SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -21,13 +21,14 @@ public: SDValue EmitMOPS(AArch64ISD::NodeType SDOpcode, SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, - SDValue SrcOrValue, SDValue Size, Align Alignment, - bool isVolatile, MachinePointerInfo DstPtrInfo, + SDValue SrcOrValue, SDValue Size, Align DstAlign, + Align SrcAlign, bool isVolatile, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; @@ -39,7 +40,7 @@ SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, - Align Alignment, bool isVolatile, + Align DstAlign, Align SrcAlign, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -15,13 +15,11 @@ #define DEBUG_TYPE "aarch64-selectiondag-info" -SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode, - SelectionDAG &DAG, const SDLoc &DL, - SDValue Chain, SDValue Dst, - SDValue SrcOrValue, SDValue Size, - Align Alignment, bool isVolatile, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const { +SDValue AArch64SelectionDAGInfo::EmitMOPS( + AArch64ISD::NodeType SDOpcode, SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain, SDValue Dst, SDValue SrcOrValue, SDValue Size, + Align DstAlign, Align SrcAlign, bool isVolatile, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // Get the constant size of the copy/set. uint64_t ConstSize = 0; @@ -54,10 +52,8 @@ MachineFunction &MF = DAG.getMachineFunction(); - auto *DstOp = - MF.getMachineMemOperand(DstPtrInfo, Flags, ConstSize, Alignment); - auto *SrcOp = - MF.getMachineMemOperand(SrcPtrInfo, Flags, ConstSize, Alignment); + auto *DstOp = MF.getMachineMemOperand(DstPtrInfo, Flags, ConstSize, DstAlign); + auto *SrcOp = MF.getMachineMemOperand(SrcPtrInfo, Flags, ConstSize, SrcAlign); if (IsSet) { // Extend value to i64 if required @@ -79,13 +75,14 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { const AArch64Subtarget &STI = DAG.getMachineFunction().getSubtarget(); if (STI.hasMOPS()) return EmitMOPS(AArch64ISD::MOPS_MEMCOPY, DAG, DL, Chain, Dst, Src, Size, - Alignment, isVolatile, DstPtrInfo, SrcPtrInfo); + DstAlign, SrcAlign, isVolatile, DstPtrInfo, SrcPtrInfo); return SDValue(); } @@ -98,20 +95,21 @@ if (STI.hasMOPS()) { return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size, - Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{}); + Alignment, Align(1), isVolatile, DstPtrInfo, + MachinePointerInfo{}); } return SDValue(); } SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVolatile, + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { const AArch64Subtarget &STI = DAG.getMachineFunction().getSubtarget(); if (STI.hasMOPS()) { return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size, - Alignment, isVolatile, DstPtrInfo, SrcPtrInfo); + DstAlign, SrcAlign, isVolatile, DstPtrInfo, SrcPtrInfo); } return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3264,8 +3264,10 @@ DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i32); SDValue Cpy = DAG.getMemcpy(Chain, DL, DstAddr, Arg, SizeNode, - Outs[i].Flags.getNonZeroByValAlign(), - /*isVol = */ false, /*AlwaysInline = */ true, + /*DstAlign*/ Outs[i].Flags.getNonZeroByValAlign(), + /*SrcAlign*/ Outs[i].Flags.getNonZeroByValAlign(), + /*isVol = */ false, + /*AlwaysInline = */ true, /*isTailCall = */ false, DstInfo, MachinePointerInfo(AMDGPUAS::PRIVATE_ADDRESS)); diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -39,7 +39,7 @@ public: SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; @@ -47,7 +47,7 @@ SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, - Align Alignment, bool isVolatile, + Align DstAlign, Align SrcAlign, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -168,30 +168,34 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { const ARMSubtarget &Subtarget = DAG.getMachineFunction().getSubtarget(); ConstantSDNode *ConstantSize = dyn_cast(Size); + const Align MaxAlignment = std::max(DstAlign, SrcAlign); + if (Subtarget.hasMVEIntegerOps() && - shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment, true)) + shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, MaxAlignment, + true)) return DAG.getNode(ARMISD::MEMCPYLOOP, dl, MVT::Other, Chain, Dst, Src, DAG.getZExtOrTrunc(Size, dl, MVT::i32)); // Do repeated 4-byte loads and stores. To be improved. // This requires 4-byte alignment. - if (Alignment < Align(4)) + if (DstAlign < Align(4) || SrcAlign < Align(4)) return SDValue(); // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. if (!ConstantSize) return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, - Alignment.value(), RTLIB::MEMCPY); + MaxAlignment.value(), RTLIB::MEMCPY); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, - Alignment.value(), RTLIB::MEMCPY); + MaxAlignment.value(), RTLIB::MEMCPY); unsigned BytesLeft = SizeVal & 3; unsigned NumMemOps = SizeVal >> 2; @@ -288,10 +292,11 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVolatile, + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, - Alignment.value(), RTLIB::MEMMOVE); + std::max(DstAlign, SrcAlign).value(), + RTLIB::MEMMOVE); } SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset( diff --git a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h --- a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h +++ b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h @@ -21,7 +21,7 @@ public: SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; diff --git a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp --- a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp +++ b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp @@ -19,15 +19,16 @@ SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { // Requires the copy size to be a constant. ConstantSDNode *ConstantSize = dyn_cast(Size); if (!ConstantSize) return SDValue(); unsigned CopyLen = ConstantSize->getZExtValue(); - unsigned StoresNumEstimate = alignTo(CopyLen, Alignment) >> Log2(Alignment); + unsigned StoresNumEstimate = alignTo(CopyLen, DstAlign) >> Log2(DstAlign); // Impose the same copy length limit as MaxStoresPerMemcpy. if (StoresNumEstimate > getCommonMaxStoresPerMemFunc()) return SDValue(); @@ -36,7 +37,7 @@ Dst = DAG.getNode(BPFISD::MEMCPY, dl, VTs, Chain, Dst, Src, DAG.getConstant(CopyLen, dl, MVT::i64), - DAG.getConstant(Alignment.value(), dl, MVT::i64)); + DAG.getConstant(DstAlign.value(), dl, MVT::i64)); return Dst.getValue(0); } diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -176,7 +176,8 @@ SelectionDAG &DAG, const SDLoc &dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy( - Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), + Chain, dl, Dst, Src, SizeNode, /*DstAlign*/ Flags.getNonZeroByValAlign(), + /*SrcAlign*/ Flags.getNonZeroByValAlign(), /*isVolatile=*/false, /*AlwaysInline=*/false, /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); } @@ -1020,7 +1021,8 @@ // Size of the va_list is 12 bytes as it has 3 pointers. Therefore, // we need to memcopy 12 bytes from va_list to another similar list. return DAG.getMemcpy(Chain, DL, DestPtr, SrcPtr, - DAG.getIntPtrConstant(12, DL), Align(4), + DAG.getIntPtrConstant(12, DL), /*DstAlign*/ Align(4), + /*SrcAlign*/ Align(4), /*isVolatile*/ false, false, false, MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); } diff --git a/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h --- a/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -23,7 +23,7 @@ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; diff --git a/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -18,10 +18,12 @@ SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast(Size); - if (AlwaysInline || Alignment < Align(4) || !ConstantSize) + if (AlwaysInline || DstAlign < Align(4) || SrcAlign < Align(4) || + !ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp --- a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp +++ b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp @@ -644,7 +644,8 @@ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue SizeNode = DAG.getConstant(Size, DL, MVT::i32); - Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, + Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, + /*DstAlign*/ Alignment, /*SrcAlign*/ Alignment, /*IsVolatile=*/false, /*AlwaysInline=*/false, /*isTailCall=*/false, MachinePointerInfo(), diff --git a/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.h b/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.h --- a/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.h +++ b/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.h @@ -24,7 +24,7 @@ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; diff --git a/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp b/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp --- a/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp @@ -20,8 +20,8 @@ SDValue LanaiSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG & /*DAG*/, const SDLoc & /*dl*/, SDValue /*Chain*/, - SDValue /*Dst*/, SDValue /*Src*/, SDValue Size, Align /*Alignment*/, - bool /*isVolatile*/, bool /*AlwaysInline*/, + SDValue /*Dst*/, SDValue /*Src*/, SDValue Size, Align /*DstAlign*/, + Align /*SrcAlign*/, bool /*isVolatile*/, bool /*AlwaysInline*/, MachinePointerInfo /*DstPtrInfo*/, MachinePointerInfo /*SrcPtrInfo*/) const { ConstantSDNode *ConstantSize = dyn_cast(Size); diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp --- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -864,11 +864,13 @@ if (Flags.isByVal()) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i16); - MemOp = DAG.getMemcpy( - Chain, dl, PtrOff, Arg, SizeNode, Flags.getNonZeroByValAlign(), - /*isVolatile*/ false, - /*AlwaysInline=*/true, - /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); + MemOp = DAG.getMemcpy(Chain, dl, PtrOff, Arg, SizeNode, + /*DstAlign*/ Flags.getNonZeroByValAlign(), + /*SrcAlign*/ Flags.getNonZeroByValAlign(), + /*isVolatile*/ false, + /*AlwaysInline=*/true, + /*isTailCall=*/false, MachinePointerInfo(), + MachinePointerInfo()); } else { MemOp = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); } diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -4485,7 +4485,8 @@ DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); Chain = DAG.getMemcpy( Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, DL, PtrTy), - Align(Alignment), /*isVolatile=*/false, /*AlwaysInline=*/false, + /*DstAlign*/ Alignment, /*SrcAlign*/ Alignment, + /*isVolatile=*/false, /*AlwaysInline=*/false, /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); MemOpChains.push_back(Chain); } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3679,9 +3679,9 @@ // We have to copy the entire va_list struct: // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), - DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8), - false, true, false, MachinePointerInfo(), - MachinePointerInfo()); + DAG.getConstant(12, SDLoc(Op), MVT::i32), + /*DstAlign*/ Align(8), /*SrcAlign*/ Align(8), false, + true, false, MachinePointerInfo(), MachinePointerInfo()); } SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, @@ -5061,8 +5061,9 @@ SelectionDAG &DAG, const SDLoc &dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, - Flags.getNonZeroByValAlign(), false, false, false, - MachinePointerInfo(), MachinePointerInfo()); + /*DstAlign*/ Flags.getNonZeroByValAlign(), + /*SrcAlign*/ Flags.getNonZeroByValAlign(), false, false, + false, MachinePointerInfo(), MachinePointerInfo()); } /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11138,7 +11138,8 @@ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); - Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, + Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, + /*DstAlign*/ Alignment, /*SrcAlign*/ Alignment, /*IsVolatile=*/false, /*AlwaysInline=*/false, IsTailCall, MachinePointerInfo(), MachinePointerInfo()); diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -788,7 +788,8 @@ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue SizeNode = DAG.getConstant(Size, dl, MVT::i32); - Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Alignment, + Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, + /*DstAlign*/ Alignment, /*SrcAlign*/ Alignment, false, // isVolatile, (Size <= 32), // AlwaysInline if size <= 32, false, // isTailCall diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -3624,7 +3624,8 @@ uint32_t Sz = Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32; return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL), - Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false, + /*DstAlign*/ Align(8), /*SrcAlign*/ Align(8), + /*isVolatile*/ false, /*AlwaysInline*/ false, /*isTailCall*/ false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -23,7 +23,7 @@ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, + SDValue Size, Align DstAlign, Align SrcAlign, bool IsVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -59,8 +59,9 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, Align DstAlign, Align SrcAlign, bool IsVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { if (IsVolatile) return SDValue(); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1066,10 +1066,12 @@ SDValue SizeNode = DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32); SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); - Chain = DAG.getMemcpy( - Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(), - /*isVolatile*/ false, /*AlwaysInline=*/false, - /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); + Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode, + /*DstAlign*/ Out.Flags.getNonZeroByValAlign(), + /*SrcAlign*/ Out.Flags.getNonZeroByValAlign(), + /*isVolatile*/ false, /*AlwaysInline=*/false, + /*isTailCall*/ false, MachinePointerInfo(), + MachinePointerInfo()); OutVal = FINode; } // Count the number of fixed args *after* legalization. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h --- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h @@ -24,14 +24,14 @@ ~WebAssemblySelectionDAGInfo() override; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, - SDValue Op3, Align Alignment, bool isVolatile, - bool AlwaysInline, + SDValue Op3, Align DstAlign, Align SrcAlign, + bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, - Align Alignment, bool isVolatile, + Align DstAlign, Align SrcAlign, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp @@ -20,8 +20,9 @@ SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, Align DstAlign, Align SrcAlign, bool IsVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { auto &ST = DAG.getMachineFunction().getSubtarget(); if (!ST.hasBulkMemory()) return SDValue(); @@ -35,11 +36,11 @@ SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemmove( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op1, SDValue Op2, - SDValue Op3, Align Alignment, bool IsVolatile, + SDValue Op3, Align DstAlign, Align SrcAlign, bool IsVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { - return EmitTargetCodeForMemcpy(DAG, DL, Chain, Op1, Op2, Op3, - Alignment, IsVolatile, false, - DstPtrInfo, SrcPtrInfo); + return EmitTargetCodeForMemcpy(DAG, DL, Chain, Op1, Op2, Op3, DstAlign, + SrcAlign, IsVolatile, false, DstPtrInfo, + SrcPtrInfo); } SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemset( diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3583,7 +3583,8 @@ SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl); return DAG.getMemcpy( - Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), + Chain, dl, Dst, Src, SizeNode, /*DstAlign*/ Flags.getNonZeroByValAlign(), + /*SrcAlign*/ Flags.getNonZeroByValAlign(), /*isVolatile*/ false, /*AlwaysInline=*/true, /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); } @@ -26166,12 +26167,13 @@ const Value *DstSV = cast(Op.getOperand(3))->getValue(); const Value *SrcSV = cast(Op.getOperand(4))->getValue(); SDLoc DL(Op); + Align Alignment(Subtarget.isTarget64BitLP64() ? 8 : 4); return DAG.getMemcpy( Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Subtarget.isTarget64BitLP64() ? 24 : 16, DL), - Align(Subtarget.isTarget64BitLP64() ? 8 : 4), /*isVolatile*/ false, false, - false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); + /*DstAlign*/ Alignment, /*SrcAlign*/ Alignment, /*isVolatile*/ false, + false, false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } // Helper to get immediate/variable SSE shift opcode from other shift opcodes. diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/llvm/lib/Target/X86/X86SelectionDAGInfo.h --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.h +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.h @@ -34,7 +34,7 @@ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -183,19 +183,14 @@ /// Returns the best type to use with repmovs depending on alignment. static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget, - uint64_t Align) { - assert((Align != 0) && "Align is normalized"); - assert(isPowerOf2_64(Align) && "Align is a power of 2"); - switch (Align) { - case 1: + Align Alignment) { + if (Alignment == Align(1)) return MVT::i8; - case 2: + if (Alignment == Align(2)) return MVT::i16; - case 4: + if (Alignment == Align(4)) return MVT::i32; - default: - return Subtarget.is64Bit() ? MVT::i64 : MVT::i32; - } + return Subtarget.is64Bit() ? MVT::i64 : MVT::i32; } /// Returns a REP MOVS instruction, possibly with a few load/stores to implement @@ -205,7 +200,7 @@ static SDValue emitConstantSizeRepmov( SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT, - unsigned Align, bool isVolatile, bool AlwaysInline, + Align DstAlign, Align SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very @@ -220,10 +215,11 @@ assert(!Subtarget.hasERMSB() && "No efficient RepMovs"); /// We assume runtime memcpy will do a better job for unaligned copies when /// ERMS is not present. - if (!AlwaysInline && (Align & 3) != 0) + if (!AlwaysInline && DstAlign < 4 && SrcAlign < 4) return SDValue(); - const MVT BlockType = getOptimalRepmovsType(Subtarget, Align); + const MVT BlockType = + getOptimalRepmovsType(Subtarget, std::max(DstAlign, SrcAlign)); const uint64_t BlockBytes = BlockType.getSizeInBits() / 8; const uint64_t BlockCount = Size / BlockBytes; const uint64_t BytesLeft = Size % BlockBytes; @@ -252,7 +248,8 @@ Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)), - DAG.getConstant(BytesLeft, dl, SizeVT), llvm::Align(Align), isVolatile, + DAG.getConstant(BytesLeft, dl, SizeVT), /*DstAlign*/ DstAlign, + /*SrcAlign*/ SrcAlign, isVolatile, /*AlwaysInline*/ true, /*isTailCall*/ false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); @@ -260,8 +257,9 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); @@ -284,7 +282,7 @@ if (ConstantSDNode *ConstantSize = dyn_cast(Size)) return emitConstantSizeRepmov( DAG, Subtarget, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), - Size.getValueType(), Alignment.value(), isVolatile, AlwaysInline, + Size.getValueType(), DstAlign, SrcAlign, isVolatile, AlwaysInline, DstPtrInfo, SrcPtrInfo); return SDValue(); diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp --- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -1392,8 +1392,8 @@ InVals.push_back(FIN); MemOps.push_back(DAG.getMemcpy( Chain, dl, FIN, ArgDI.SDV, DAG.getConstant(Size, dl, MVT::i32), - Alignment, false, false, false, MachinePointerInfo(), - MachinePointerInfo())); + /*DstAlign*/ Alignment, /*SrcAlign*/ Alignment, false, false, false, + MachinePointerInfo(), MachinePointerInfo())); } else { InVals.push_back(ArgDI.SDV); } @@ -1785,17 +1785,16 @@ unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits(); assert((StoreBits % 8) == 0 && "Store size in bits must be a multiple of 8"); - Align Alignment = ST->getAlign(); if (LoadSDNode *LD = dyn_cast(ST->getValue())) { if (LD->hasNUsesOfValue(1, 0) && ST->getMemoryVT() == LD->getMemoryVT() && - LD->getAlign() == Alignment && - !LD->isVolatile() && !LD->isIndexed() && - Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) { + LD->getAlign() == ST->getAlign() && !LD->isVolatile() && + !LD->isIndexed() && + Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) { bool isTail = isInTailCallPosition(DAG, ST, Chain); return DAG.getMemmove(Chain, dl, ST->getBasePtr(), LD->getBasePtr(), DAG.getConstant(StoreBits / 8, dl, MVT::i32), - Alignment, false, isTail, + ST->getAlign(), LD->getAlign(), false, isTail, ST->getPointerInfo(), LD->getPointerInfo()); } } diff --git a/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h b/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h --- a/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h +++ b/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h @@ -21,8 +21,8 @@ public: SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, - SDValue Op3, Align Alignment, bool isVolatile, - bool AlwaysInline, + SDValue Op3, Align DstAlign, Align SrcAlign, + bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; }; diff --git a/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp --- a/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp +++ b/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp @@ -17,11 +17,12 @@ SDValue XCoreSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { unsigned SizeBitWidth = Size.getValueSizeInBits(); // Call __memcpy_4 if the src, dst and size are all 4 byte aligned. - if (!AlwaysInline && Alignment >= Align(4) && + if (!AlwaysInline && DstAlign >= Align(4) && SrcAlign >= Align(4) && DAG.MaskedValueIsZero(Size, APInt(SizeBitWidth, 3))) { const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering(); TargetLowering::ArgListTy Args; diff --git a/llvm/test/CodeGen/ARM/memcpy-inline.ll b/llvm/test/CodeGen/ARM/memcpy-inline.ll --- a/llvm/test/CodeGen/ARM/memcpy-inline.ll +++ b/llvm/test/CodeGen/ARM/memcpy-inline.ll @@ -128,20 +128,18 @@ define void @t6() nounwind { ; CHECK-LABEL: t6: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movw r0, :lower16:(L_.str6-(LPC6_0+4)) -; CHECK-NEXT: movt r0, :upper16:(L_.str6-(LPC6_0+4)) +; CHECK-NEXT: movw r9, :lower16:(L_.str6-(LPC6_0+4)) +; CHECK-NEXT: movt r9, :upper16:(L_.str6-(LPC6_0+4)) ; CHECK-NEXT: LPC6_0: -; CHECK-NEXT: add r0, pc -; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: add r9, pc ; CHECK-NEXT: movw r1, :lower16:(_spool.splbuf-(LPC6_1+4)) ; CHECK-NEXT: movt r1, :upper16:(_spool.splbuf-(LPC6_1+4)) ; CHECK-NEXT: LPC6_1: ; CHECK-NEXT: add r1, pc -; CHECK-NEXT: vstr d16, [r1] -; CHECK-NEXT: adds r1, #6 -; CHECK-NEXT: adds r0, #6 -; CHECK-NEXT: vld1.16 {d16}, [r0] -; CHECK-NEXT: vst1.16 {d16}, [r1] +; CHECK-NEXT: ldm.w r9!, {r0, r2, r3} +; CHECK-NEXT: stm r1!, {r0, r2, r3} +; CHECK-NEXT: ldrh.w r0, [r9] +; CHECK-NEXT: strh r0, [r1] ; CHECK-NEXT: bx lr entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i1 false)